2021-02-24 15:34:40 +03:00
|
|
|
package cluster
|
|
|
|
|
|
|
|
// Copyright (c) Microsoft Corporation.
|
|
|
|
// Licensed under the Apache License 2.0.
|
|
|
|
|
|
|
|
import (
|
2023-11-28 18:45:00 +03:00
|
|
|
"bytes"
|
2021-02-24 15:34:40 +03:00
|
|
|
"context"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
mgmtauthorization "github.com/Azure/azure-sdk-for-go/services/preview/authorization/mgmt/2018-09-01-preview/authorization"
|
|
|
|
"github.com/ghodss/yaml"
|
2023-11-28 18:45:00 +03:00
|
|
|
corev1 "k8s.io/api/core/v1"
|
2021-02-26 20:53:16 +03:00
|
|
|
kerrors "k8s.io/apimachinery/pkg/api/errors"
|
2021-02-24 15:34:40 +03:00
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
2023-08-28 22:23:42 +03:00
|
|
|
applyv1 "k8s.io/client-go/applyconfigurations/core/v1"
|
2021-02-24 15:34:40 +03:00
|
|
|
"k8s.io/client-go/util/retry"
|
|
|
|
|
|
|
|
"github.com/Azure/ARO-RP/pkg/util/arm"
|
2023-08-28 22:23:42 +03:00
|
|
|
"github.com/Azure/ARO-RP/pkg/util/clusterauthorizer"
|
2021-02-24 15:34:40 +03:00
|
|
|
"github.com/Azure/ARO-RP/pkg/util/rbac"
|
|
|
|
"github.com/Azure/ARO-RP/pkg/util/stringutils"
|
|
|
|
)
|
|
|
|
|
|
|
|
func (m *manager) createOrUpdateClusterServicePrincipalRBAC(ctx context.Context) error {
|
|
|
|
resourceGroupID := m.doc.OpenShiftCluster.Properties.ClusterProfile.ResourceGroupID
|
|
|
|
resourceGroup := stringutils.LastTokenByte(resourceGroupID, '/')
|
|
|
|
clusterSPObjectID := m.doc.OpenShiftCluster.Properties.ServicePrincipalProfile.SPObjectID
|
|
|
|
|
|
|
|
roleAssignments, err := m.roleAssignments.ListForResourceGroup(ctx, resourceGroup, "")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// We are interested in Resource group scope only (inherited are returned too).
|
|
|
|
var toDelete []mgmtauthorization.RoleAssignment
|
|
|
|
var found bool
|
|
|
|
for _, assignment := range roleAssignments {
|
2021-03-17 17:56:56 +03:00
|
|
|
if !strings.EqualFold(*assignment.Scope, resourceGroupID) ||
|
|
|
|
strings.HasSuffix(strings.ToLower(*assignment.RoleDefinitionID), strings.ToLower(rbac.RoleOwner)) /* should only matter in development */ {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if strings.EqualFold(*assignment.PrincipalID, clusterSPObjectID) &&
|
|
|
|
strings.HasSuffix(strings.ToLower(*assignment.RoleDefinitionID), strings.ToLower(rbac.RoleContributor)) {
|
|
|
|
found = true
|
|
|
|
} else {
|
|
|
|
toDelete = append(toDelete, assignment)
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, assignment := range toDelete {
|
2021-02-25 04:50:28 +03:00
|
|
|
m.log.Infof("deleting role assignment %s", *assignment.Name)
|
2021-02-24 15:34:40 +03:00
|
|
|
_, err := m.roleAssignments.Delete(ctx, *assignment.Scope, *assignment.Name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-17 17:56:56 +03:00
|
|
|
err = m.deleteRoleDefinition(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-02-24 15:34:40 +03:00
|
|
|
if !found {
|
2021-02-25 04:50:28 +03:00
|
|
|
m.log.Info("creating cluster service principal role assignment")
|
2021-02-24 15:34:40 +03:00
|
|
|
t := &arm.Template{
|
|
|
|
Schema: "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
|
|
|
|
ContentVersion: "1.0.0.0",
|
2021-03-15 15:32:38 +03:00
|
|
|
Resources: []*arm.Resource{m.clusterServicePrincipalRBAC()},
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|
2022-06-17 03:50:43 +03:00
|
|
|
err = arm.DeployTemplate(ctx, m.log, m.deployments, resourceGroup, "clustersp", t, nil)
|
2021-02-24 15:34:40 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-11-28 18:45:00 +03:00
|
|
|
// cloudConfigSecretFromChanges takes in the kube-system/azure-cloud-provider Secret and a map
|
|
|
|
// containing cloud-config data. If the cloud-config data in cf is different from what's currently
|
|
|
|
// in the Secret, cloudConfigSecretFromChanges updates and returns the Secret. Otherwise, it returns nil.
|
|
|
|
func cloudConfigSecretFromChanges(secret *corev1.Secret, cf map[string]interface{}) (*corev1.Secret, error) {
|
|
|
|
data, err := yaml.Marshal(cf)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !bytes.Equal(secret.Data["cloud-config"], data) {
|
|
|
|
secret.Data["cloud-config"] = data
|
|
|
|
return secret, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// servicePrincipalUpdated checks whether the CSP has been updated by comparing the cluster doc's
|
|
|
|
// ServicePrincipalProfile to the contents of the kube-system/azure-cloud-provider Secret. If the CSP
|
|
|
|
// has changed, it returns a new corev1.Secret to use to update the Secret to match
|
|
|
|
// what's in the cluster doc.
|
|
|
|
func (m *manager) servicePrincipalUpdated(ctx context.Context) (*corev1.Secret, error) {
|
2021-02-24 15:34:40 +03:00
|
|
|
spp := m.doc.OpenShiftCluster.Properties.ServicePrincipalProfile
|
2023-11-28 18:45:00 +03:00
|
|
|
//data:
|
|
|
|
// cloud-config: <base64 map[string]string with keys 'aadClientId' and 'aadClientSecret'>
|
|
|
|
secret, err := m.kubernetescli.CoreV1().Secrets("kube-system").Get(ctx, "azure-cloud-provider", metav1.GetOptions{})
|
|
|
|
if err != nil {
|
|
|
|
if kerrors.IsNotFound(err) { // we are not in control if secret is not present
|
|
|
|
return nil, nil
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|
2023-11-28 18:45:00 +03:00
|
|
|
return nil, err
|
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2023-11-28 18:45:00 +03:00
|
|
|
var cf map[string]interface{}
|
|
|
|
if secret != nil && secret.Data != nil {
|
|
|
|
err = yaml.Unmarshal(secret.Data["cloud-config"], &cf)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if val, ok := cf["aadClientId"].(string); ok {
|
|
|
|
if val != spp.ClientID {
|
|
|
|
cf["aadClientId"] = spp.ClientID
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|
2023-11-28 18:45:00 +03:00
|
|
|
}
|
|
|
|
if val, ok := cf["aadClientSecret"].(string); ok {
|
|
|
|
if val != string(spp.ClientSecret) {
|
|
|
|
cf["aadClientSecret"] = spp.ClientSecret
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|
|
|
|
}
|
2023-11-28 18:45:00 +03:00
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2023-11-28 18:45:00 +03:00
|
|
|
return cloudConfigSecretFromChanges(secret, cf)
|
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2023-11-28 18:45:00 +03:00
|
|
|
func (m *manager) updateAROSecret(ctx context.Context) error {
|
|
|
|
var changed bool
|
|
|
|
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
|
|
|
|
secret, err := m.servicePrincipalUpdated(ctx)
|
|
|
|
changed = secret != nil
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if changed {
|
2021-02-24 15:34:40 +03:00
|
|
|
_, err = m.kubernetescli.CoreV1().Secrets("kube-system").Update(ctx, secret, metav1.UpdateOptions{})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-02-24 18:40:32 +03:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2021-02-24 18:40:32 +03:00
|
|
|
// return early if not changed
|
|
|
|
if !changed {
|
|
|
|
return nil
|
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2021-02-24 18:40:32 +03:00
|
|
|
// If secret change we need to trigger kube-api-server and kube-controller-manager restarts
|
|
|
|
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
|
|
|
|
kAPIServer, err := m.operatorcli.OperatorV1().KubeAPIServers().Get(ctx, "cluster", metav1.GetOptions{})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
kAPIServer.Spec.ForceRedeploymentReason = "Credential rotation " + time.Now().UTC().String()
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2021-02-24 18:40:32 +03:00
|
|
|
_, err = m.operatorcli.OperatorV1().KubeAPIServers().Update(ctx, kAPIServer, metav1.UpdateOptions{})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
2021-02-24 18:53:07 +03:00
|
|
|
// Log the error and continue. This code is inherently edge triggered;
|
|
|
|
// if we fail and the user retries, we won't re-trigger this code anyway,
|
|
|
|
// so it doesn't really help anyone to make this a hard failure
|
2021-02-24 18:40:32 +03:00
|
|
|
m.log.Error(err)
|
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2021-02-24 18:40:32 +03:00
|
|
|
err = retry.RetryOnConflict(retry.DefaultRetry, func() error {
|
|
|
|
kManager, err := m.operatorcli.OperatorV1().KubeControllerManagers().Get(ctx, "cluster", metav1.GetOptions{})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
kManager.Spec.ForceRedeploymentReason = "Credential rotation " + time.Now().UTC().String()
|
|
|
|
|
|
|
|
_, err = m.operatorcli.OperatorV1().KubeControllerManagers().Update(ctx, kManager, metav1.UpdateOptions{})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
2021-02-24 18:40:32 +03:00
|
|
|
if err != nil {
|
|
|
|
m.log.Error(err)
|
|
|
|
}
|
|
|
|
return nil
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (m *manager) updateOpenShiftSecret(ctx context.Context) error {
|
2023-08-28 22:23:42 +03:00
|
|
|
resourceGroupID := m.doc.OpenShiftCluster.Properties.ClusterProfile.ResourceGroupID
|
2021-02-24 15:34:40 +03:00
|
|
|
spp := m.doc.OpenShiftCluster.Properties.ServicePrincipalProfile
|
2023-08-28 22:23:42 +03:00
|
|
|
//data:
|
|
|
|
// azure_client_id: secret_id
|
|
|
|
// azure_client_secret: secret_value
|
|
|
|
// azure_tenant_id: tenant_id
|
|
|
|
desiredData := map[string][]byte{
|
|
|
|
"azure_subscription_id": []byte(m.subscriptionDoc.ID),
|
|
|
|
"azure_resource_prefix": []byte(m.doc.OpenShiftCluster.Properties.InfraID),
|
|
|
|
"azure_resourcegroup": []byte(resourceGroupID[strings.LastIndex(resourceGroupID, "/")+1:]),
|
|
|
|
"azure_region": []byte(m.doc.OpenShiftCluster.Location),
|
|
|
|
"azure_client_id": []byte(spp.ClientID),
|
|
|
|
"azure_client_secret": []byte(spp.ClientSecret),
|
|
|
|
"azure_tenant_id": []byte(m.subscriptionDoc.Subscription.Properties.TenantID),
|
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2023-08-28 22:23:42 +03:00
|
|
|
secretApplyConfig := applyv1.Secret(clusterauthorizer.AzureCredentialSecretName, clusterauthorizer.AzureCredentialSecretNameSpace).WithData(desiredData)
|
|
|
|
_, err := m.kubernetescli.CoreV1().Secrets(clusterauthorizer.AzureCredentialSecretNameSpace).Apply(ctx, secretApplyConfig, metav1.ApplyOptions{FieldManager: "aro-rp", Force: true})
|
2021-02-24 18:40:32 +03:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-02-24 15:34:40 +03:00
|
|
|
|
2021-02-24 18:40:32 +03:00
|
|
|
// restart cloud credentials operator to trigger rotation
|
|
|
|
err = m.kubernetescli.CoreV1().Pods("openshift-cloud-credential-operator").DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{
|
|
|
|
LabelSelector: "app=cloud-credential-operator",
|
2021-02-24 15:34:40 +03:00
|
|
|
})
|
2021-02-24 18:40:32 +03:00
|
|
|
if err != nil {
|
2021-02-24 18:53:07 +03:00
|
|
|
// Log the error and continue. This code is inherently edge triggered;
|
|
|
|
// if we fail and the user retries, we won't re-trigger this code anyway,
|
|
|
|
// so it doesn't really help anyone to make this a hard failure
|
2021-02-24 18:40:32 +03:00
|
|
|
m.log.Error(err)
|
|
|
|
}
|
|
|
|
return nil
|
2021-02-24 15:34:40 +03:00
|
|
|
}
|