Fix storage account controller vnet rule additions

Fixed such that we continue to add vnet rules for cluster subnets if Microsoft.Storage service endpoint present on subnet(s)

Some context for this fix:

"""
Unbeknownst to us, as of OCP 4.11, the default StorageClass object for azurefile-csi was automatically selecting the cluster Storage Account we use to store things like ignition config to also back all PVs created with this storage class, that is, customer data.

As of this RP Release / PUCM, ARO SRE added functionality to ARO to disallow requirements on Service Endpoints on cluster subnets (seen as an attack vector for some highly security-conscious customers), and in doing so, we added some NACL rules to not allow nodes to access the cluster storage account, because we figured from a service perspective it was unused.

These two things are now in conflict. We now have some customers who used the default storage class for Azure File, and can no longer mount volumes due to our new NACL configuration. We need to modify our stack to accommodate these customers (estimated to be a few dozen).
"""
This commit is contained in:
kimorris27 2023-10-12 10:55:12 -05:00 коммит произвёл bennerv
Родитель d346dd760c
Коммит b75bc878d1
3 изменённых файлов: 211 добавлений и 14 удалений

Просмотреть файл

@ -48,6 +48,7 @@ type reconcileManager struct {
client client.Client
kubeSubnets subnet.KubeManager
mgmtSubnets subnet.Manager
storage storage.AccountsClient
}
@ -103,6 +104,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, request ctrl.Request) (ctrl.
client: r.client,
kubeSubnets: subnet.NewKubeManager(r.client, resource.SubscriptionID),
mgmtSubnets: subnet.NewManager(&azEnv, resource.SubscriptionID, authorizer),
storage: storage.NewAccountsClient(&azEnv, resource.SubscriptionID, authorizer),
}

Просмотреть файл

@ -22,21 +22,39 @@ func (r *reconcileManager) reconcileAccounts(ctx context.Context) error {
serviceSubnets := r.instance.Spec.ServiceSubnets
// Only include the master and worker subnets in the storage accounts' virtual
// network rules if egress lockdown is not enabled.
if !operator.GatewayEnabled(r.instance) {
subnets, err := r.kubeSubnets.List(ctx)
subnets, err := r.kubeSubnets.List(ctx)
if err != nil {
return err
}
// Include the master and worker subnets in the storage accounts' virtual
// network rules if either of the following is true:
// 1) The Microsoft.Storage service endpoint is still present on the subnet(s)
// 2) Egress lockdown is not enabled
for _, subnet := range subnets {
hasStorageEndpoint := false
mgmtSubnet, err := r.mgmtSubnets.Get(ctx, subnet.ResourceID)
if err != nil {
return err
}
for _, subnet := range subnets {
if mgmtSubnet.SubnetPropertiesFormat != nil && mgmtSubnet.SubnetPropertiesFormat.ServiceEndpoints != nil {
for _, serviceEndpoint := range *mgmtSubnet.SubnetPropertiesFormat.ServiceEndpoints {
if serviceEndpoint.Service != nil && *serviceEndpoint.Service == "Microsoft.Storage" {
hasStorageEndpoint = true
}
}
}
if hasStorageEndpoint || !operator.GatewayEnabled(r.instance) {
serviceSubnets = append(serviceSubnets, subnet.ResourceID)
}
}
rc := &imageregistryv1.Config{}
err := r.client.Get(ctx, types.NamespacedName{Name: "cluster"}, rc)
err = r.client.Get(ctx, types.NamespacedName{Name: "cluster"}, rc)
if err != nil {
return err
}

Просмотреть файл

@ -8,6 +8,7 @@ import (
"strconv"
"testing"
mgmtnetwork "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2020-08-01/network"
mgmtstorage "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2019-06-01/storage"
"github.com/Azure/go-autorest/autorest/to"
"github.com/golang/mock/gomock"
@ -16,6 +17,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"github.com/Azure/ARO-RP/pkg/api"
apisubnet "github.com/Azure/ARO-RP/pkg/api/util/subnet"
arov1alpha1 "github.com/Azure/ARO-RP/pkg/operator/apis/aro.openshift.io/v1alpha1"
mock_storage "github.com/Azure/ARO-RP/pkg/util/mocks/azureclient/mgmt/storage"
mock_subnet "github.com/Azure/ARO-RP/pkg/util/mocks/subnet"
@ -27,10 +30,12 @@ var (
subscriptionId = "0000000-0000-0000-0000-000000000000"
clusterResourceGroupName = "aro-iljrzb5a"
clusterResourceGroupId = "/subscriptions/" + subscriptionId + "/resourcegroups/" + clusterResourceGroupName
infraId = "abcd"
vnetResourceGroup = "vnet-rg"
vnetName = "vnet"
subnetNameWorker = "worker"
subnetNameMaster = "master"
nsgv1MasterResourceId = clusterResourceGroupId + "/providers/Microsoft.Network/networkSecurityGroups/" + infraId + apisubnet.NSGControlPlaneSuffixV1
storageSuffix = "random-suffix"
clusterStorageAccountName = "cluster" + storageSuffix
@ -70,12 +75,31 @@ func getValidAccount(virtualNetworkResourceIDs []string) *mgmtstorage.Account {
return account
}
func getValidSubnet(resourceId string) *mgmtnetwork.Subnet {
s := &mgmtnetwork.Subnet{
ID: to.StringPtr(resourceId),
SubnetPropertiesFormat: &mgmtnetwork.SubnetPropertiesFormat{
NetworkSecurityGroup: &mgmtnetwork.SecurityGroup{
ID: to.StringPtr(nsgv1MasterResourceId),
},
ServiceEndpoints: &[]mgmtnetwork.ServiceEndpointPropertiesFormat{},
},
}
for _, endpoint := range api.SubnetsEndpoints {
*s.SubnetPropertiesFormat.ServiceEndpoints = append(*s.SubnetPropertiesFormat.ServiceEndpoints, mgmtnetwork.ServiceEndpointPropertiesFormat{
Service: to.StringPtr(endpoint),
ProvisioningState: mgmtnetwork.Succeeded,
})
}
return s
}
func TestReconcileManager(t *testing.T) {
log := logrus.NewEntry(logrus.StandardLogger())
for _, tt := range []struct {
name string
mocks func(*mock_storage.MockAccountsClient, *mock_subnet.MockKubeManager)
mocks func(*mock_storage.MockAccountsClient, *mock_subnet.MockKubeManager, *mock_subnet.MockManager)
instance func(*arov1alpha1.Cluster)
operatorFlag bool
wantErr error
@ -83,7 +107,14 @@ func TestReconcileManager(t *testing.T) {
{
name: "Operator Flag enabled - nothing to do",
operatorFlag: true,
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager) {
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager, mgmtSubnet *mock_subnet.MockManager) {
// Azure subnets
masterSubnet := getValidSubnet(resourceIdMaster)
workerSubnet := getValidSubnet(resourceIdWorker)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdMaster).Return(masterSubnet, nil)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdWorker).Return(workerSubnet, nil)
// cluster subnets
kubeSubnet.EXPECT().List(gomock.Any()).Return([]subnet.Subnet{
{
@ -105,7 +136,14 @@ func TestReconcileManager(t *testing.T) {
{
name: "Operator Flag disabled - nothing to do",
operatorFlag: false,
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager) {
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager, mgmtSubnet *mock_subnet.MockManager) {
// Azure subnets
masterSubnet := getValidSubnet(resourceIdMaster)
workerSubnet := getValidSubnet(resourceIdWorker)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdMaster).Return(masterSubnet, nil)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdWorker).Return(workerSubnet, nil)
// cluster subnets
kubeSubnet.EXPECT().List(gomock.Any()).Return([]subnet.Subnet{
{
@ -125,9 +163,22 @@ func TestReconcileManager(t *testing.T) {
},
},
{
name: "Operator Flag enabled - all rules to all accounts",
name: "Operator Flag enabled - all rules to all accounts because egress lockdown not enabled",
operatorFlag: true,
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager) {
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager, mgmtSubnet *mock_subnet.MockManager) {
// Azure subnets
masterSubnet := getValidSubnet(resourceIdMaster)
workerSubnet := getValidSubnet(resourceIdWorker)
// Service endpoints should be there if egress lockdown hasn't yet been enabled, but let's
// pessimistically assume they have somehow been removed so we have the opportunity to
// test a messy edge case.
masterSubnet.ServiceEndpoints = nil
workerSubnet.ServiceEndpoints = nil
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdMaster).Return(masterSubnet, nil)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdWorker).Return(workerSubnet, nil)
// cluster subnets
kubeSubnet.EXPECT().List(gomock.Any()).Return([]subnet.Subnet{
{
@ -165,12 +216,136 @@ func TestReconcileManager(t *testing.T) {
},
},
{
name: "Operator Flag enabled - nothing to do because egress lockdown is enabled",
name: "Operator flag and egress lockdown enabled - all rules to all accounts because storage service endpoint on subnets",
operatorFlag: true,
instance: func(cluster *arov1alpha1.Cluster) {
cluster.Spec.GatewayDomains = []string{"somegatewaydomain.com"}
},
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager) {
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager, mgmtSubnet *mock_subnet.MockManager) {
// Azure subnets
masterSubnet := getValidSubnet(resourceIdMaster)
workerSubnet := getValidSubnet(resourceIdWorker)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdMaster).Return(masterSubnet, nil)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdWorker).Return(workerSubnet, nil)
// cluster subnets
kubeSubnet.EXPECT().List(gomock.Any()).Return([]subnet.Subnet{
{
ResourceID: resourceIdMaster,
IsMaster: true,
},
{
ResourceID: resourceIdWorker,
IsMaster: false,
},
}, nil)
// storage objects in azure
result := getValidAccount([]string{})
updated := mgmtstorage.AccountUpdateParameters{
AccountPropertiesUpdateParameters: &mgmtstorage.AccountPropertiesUpdateParameters{
NetworkRuleSet: getValidAccount([]string{resourceIdMaster, resourceIdWorker}).NetworkRuleSet,
},
}
storage.EXPECT().GetProperties(gomock.Any(), clusterResourceGroupName, clusterStorageAccountName, gomock.Any()).Return(*result, nil)
storage.EXPECT().Update(gomock.Any(), clusterResourceGroupName, clusterStorageAccountName, updated)
// we can't reuse these from above due to fact how gomock handles objects.
// they are modified by the functions so they are not the same anymore
result = getValidAccount([]string{})
updated = mgmtstorage.AccountUpdateParameters{
AccountPropertiesUpdateParameters: &mgmtstorage.AccountPropertiesUpdateParameters{
NetworkRuleSet: getValidAccount([]string{resourceIdMaster, resourceIdWorker}).NetworkRuleSet,
},
}
storage.EXPECT().GetProperties(gomock.Any(), clusterResourceGroupName, registryStorageAccountName, gomock.Any()).Return(*result, nil)
storage.EXPECT().Update(gomock.Any(), clusterResourceGroupName, registryStorageAccountName, updated)
},
},
{
name: "Operator flag and egress lockdown enabled - worker subnet rule to all accounts because storage service endpoint on worker subnet",
operatorFlag: true,
instance: func(cluster *arov1alpha1.Cluster) {
cluster.Spec.GatewayDomains = []string{"somegatewaydomain.com"}
},
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager, mgmtSubnet *mock_subnet.MockManager) {
// Azure subnets
masterSubnet := getValidSubnet(resourceIdMaster)
workerSubnet := getValidSubnet(resourceIdWorker)
masterSubnet.ServiceEndpoints = nil
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdMaster).Return(masterSubnet, nil)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdWorker).Return(workerSubnet, nil)
// cluster subnets
kubeSubnet.EXPECT().List(gomock.Any()).Return([]subnet.Subnet{
{
ResourceID: resourceIdMaster,
IsMaster: true,
},
{
ResourceID: resourceIdWorker,
IsMaster: false,
},
}, nil)
// storage objects in azure
result := getValidAccount([]string{})
updated := mgmtstorage.AccountUpdateParameters{
AccountPropertiesUpdateParameters: &mgmtstorage.AccountPropertiesUpdateParameters{
NetworkRuleSet: getValidAccount([]string{resourceIdWorker}).NetworkRuleSet,
},
}
storage.EXPECT().GetProperties(gomock.Any(), clusterResourceGroupName, clusterStorageAccountName, gomock.Any()).Return(*result, nil)
storage.EXPECT().Update(gomock.Any(), clusterResourceGroupName, clusterStorageAccountName, updated)
// we can't reuse these from above due to fact how gomock handles objects.
// they are modified by the functions so they are not the same anymore
result = getValidAccount([]string{})
updated = mgmtstorage.AccountUpdateParameters{
AccountPropertiesUpdateParameters: &mgmtstorage.AccountPropertiesUpdateParameters{
NetworkRuleSet: getValidAccount([]string{resourceIdWorker}).NetworkRuleSet,
},
}
storage.EXPECT().GetProperties(gomock.Any(), clusterResourceGroupName, registryStorageAccountName, gomock.Any()).Return(*result, nil)
storage.EXPECT().Update(gomock.Any(), clusterResourceGroupName, registryStorageAccountName, updated)
},
},
{
name: "Operator Flag and egress lockdown enabled - nothing to do because there are no service endpoints",
operatorFlag: true,
instance: func(cluster *arov1alpha1.Cluster) {
cluster.Spec.GatewayDomains = []string{"somegatewaydomain.com"}
},
mocks: func(storage *mock_storage.MockAccountsClient, kubeSubnet *mock_subnet.MockKubeManager, mgmtSubnet *mock_subnet.MockManager) {
// Azure subnets
masterSubnet := getValidSubnet(resourceIdMaster)
workerSubnet := getValidSubnet(resourceIdWorker)
masterSubnet.ServiceEndpoints = nil
workerSubnet.ServiceEndpoints = nil
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdMaster).Return(masterSubnet, nil)
mgmtSubnet.EXPECT().Get(gomock.Any(), resourceIdWorker).Return(workerSubnet, nil)
// cluster subnets
kubeSubnet.EXPECT().List(gomock.Any()).Return([]subnet.Subnet{
{
ResourceID: resourceIdMaster,
IsMaster: true,
},
{
ResourceID: resourceIdWorker,
IsMaster: false,
},
}, nil)
// storage objects in azure
result := getValidAccount([]string{})
@ -185,9 +360,10 @@ func TestReconcileManager(t *testing.T) {
storage := mock_storage.NewMockAccountsClient(controller)
kubeSubnet := mock_subnet.NewMockKubeManager(controller)
mgmtSubnet := mock_subnet.NewMockManager(controller)
if tt.mocks != nil {
tt.mocks(storage, kubeSubnet)
tt.mocks(storage, kubeSubnet, mgmtSubnet)
}
instance := getValidClusterInstance(tt.operatorFlag)
@ -214,6 +390,7 @@ func TestReconcileManager(t *testing.T) {
instance: instance,
subscriptionID: subscriptionId,
storage: storage,
mgmtSubnets: mgmtSubnet,
kubeSubnets: kubeSubnet,
client: clientFake,
}