Merge pull request #2569 from ArrisLee/toil/time-metrics-for-cluster-update

Toil/time metrics for cluster update
This commit is contained in:
Spencer Amann 2023-03-10 11:41:39 -05:00 коммит произвёл GitHub
Родитель 75d3908986 8734325163
Коммит 9d742187a9
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 40 добавлений и 19 удалений

Просмотреть файл

@ -31,7 +31,7 @@ import (
// AdminUpdate performs an admin update of an ARO cluster
func (m *manager) AdminUpdate(ctx context.Context) error {
toRun := m.adminUpdate()
return m.runSteps(ctx, toRun, false)
return m.runSteps(ctx, toRun, "adminUpdate")
}
func (m *manager) adminUpdate() []steps.Step {
@ -189,7 +189,7 @@ func (m *manager) Update(ctx context.Context) error {
)
}
return m.runSteps(ctx, s, false)
return m.runSteps(ctx, s, "update")
}
func (m *manager) runIntegratedInstaller(ctx context.Context) error {
@ -337,21 +337,21 @@ func (m *manager) Install(ctx context.Context) error {
return fmt.Errorf("unrecognised phase %s", m.doc.OpenShiftCluster.Properties.Install.Phase)
}
m.log.Printf("starting phase %s", m.doc.OpenShiftCluster.Properties.Install.Phase)
return m.runSteps(ctx, steps[m.doc.OpenShiftCluster.Properties.Install.Phase], true)
return m.runSteps(ctx, steps[m.doc.OpenShiftCluster.Properties.Install.Phase], "install")
}
func (m *manager) runSteps(ctx context.Context, s []steps.Step, emitMetrics bool) error {
func (m *manager) runSteps(ctx context.Context, s []steps.Step, metricsTopic string) error {
var err error
if emitMetrics {
if metricsTopic != "" {
var stepsTimeRun map[string]int64
stepsTimeRun, err = steps.Run(ctx, m.log, 10*time.Second, s, m.now)
if err == nil {
var totalInstallTime int64
for topic, duration := range stepsTimeRun {
m.metricsEmitter.EmitGauge(fmt.Sprintf("backend.openshiftcluster.installtime.%s", topic), duration, nil)
for stepName, duration := range stepsTimeRun {
m.metricsEmitter.EmitGauge(fmt.Sprintf("backend.openshiftcluster.%s.%s.duration.seconds", metricsTopic, stepName), duration, nil)
totalInstallTime += duration
}
m.metricsEmitter.EmitGauge("backend.openshiftcluster.installtime.total", totalInstallTime, nil)
m.metricsEmitter.EmitGauge(fmt.Sprintf("backend.openshiftcluster.%s.duration.total.seconds", metricsTopic), totalInstallTime, nil)
}
} else {
_, err = steps.Run(ctx, m.log, 10*time.Second, s, nil)

Просмотреть файл

@ -176,7 +176,7 @@ func TestStepRunnerWithInstaller(t *testing.T) {
now: func() time.Time { return time.Now() },
}
err := m.runSteps(ctx, tt.steps, false)
err := m.runSteps(ctx, tt.steps, "")
if err != nil && err.Error() != tt.wantErr ||
err == nil && tt.wantErr != "" {
t.Error(err)
@ -239,28 +239,49 @@ func TestInstallationTimeMetrics(t *testing.T) {
for _, tt := range []struct {
name string
metricsTopic string
timePerStep int64
steps []steps.Step
wantedMetrics map[string]int64
}{
{
name: "Failed step run will not generate any install time metrics",
metricsTopic: "install",
steps: []steps.Step{
steps.Action(successfulActionStep),
steps.Action(failingFunc),
},
},
{
name: "Succeeded step run will generate a valid install time metrics",
name: "Succeeded step run for cluster installation will generate a valid install time metrics",
metricsTopic: "install",
timePerStep: 2,
steps: []steps.Step{
steps.Action(successfulActionStep),
steps.Condition(successfulConditionStep, 30*time.Minute, true),
steps.AuthorizationRefreshingAction(nil, steps.Action(successfulActionStep)),
},
wantedMetrics: map[string]int64{
"backend.openshiftcluster.installtime.total": 6,
"backend.openshiftcluster.installtime.action.successfulActionStep": 2,
"backend.openshiftcluster.installtime.condition.successfulConditionStep": 2,
"backend.openshiftcluster.installtime.refreshing.successfulActionStep": 2,
"backend.openshiftcluster.install.duration.total.seconds": 6,
"backend.openshiftcluster.install.action.successfulActionStep.duration.seconds": 2,
"backend.openshiftcluster.install.condition.successfulConditionStep.duration.seconds": 2,
"backend.openshiftcluster.install.refreshing.successfulActionStep.duration.seconds": 2,
},
},
{
name: "Succeeded step run for cluster update will generate a valid install time metrics",
metricsTopic: "update",
timePerStep: 3,
steps: []steps.Step{
steps.Action(successfulActionStep),
steps.Condition(successfulConditionStep, 30*time.Minute, true),
steps.AuthorizationRefreshingAction(nil, steps.Action(successfulActionStep)),
},
wantedMetrics: map[string]int64{
"backend.openshiftcluster.update.duration.total.seconds": 9,
"backend.openshiftcluster.update.action.successfulActionStep.duration.seconds": 3,
"backend.openshiftcluster.update.condition.successfulConditionStep.duration.seconds": 3,
"backend.openshiftcluster.update.refreshing.successfulActionStep.duration.seconds": 3,
},
},
} {
@ -269,10 +290,10 @@ func TestInstallationTimeMetrics(t *testing.T) {
m := &manager{
log: log,
metricsEmitter: fm,
now: func() time.Time { return time.Now().Add(2 * time.Second) },
now: func() time.Time { return time.Now().Add(time.Duration(tt.timePerStep) * time.Second) },
}
err := m.runSteps(ctx, tt.steps, true)
err := m.runSteps(ctx, tt.steps, tt.metricsTopic)
if err != nil {
if len(fm.Metrics) != 0 {
t.Error("fake metrics obj should be empty when run steps failed")
@ -282,10 +303,10 @@ func TestInstallationTimeMetrics(t *testing.T) {
for k, v := range tt.wantedMetrics {
time, ok := fm.Metrics[k]
if !ok {
t.Errorf("unexpected metrics topic: %s", k)
t.Errorf("unexpected metrics key: %s", k)
}
if time != v {
t.Errorf("incorrect fake metrics obj, want: %d, got: %d", v, time)
t.Errorf("incorrect fake metrics value, want: %d, got: %d", v, time)
}
}
}