From f94384934a04080a98c21ae372a40755037d8609 Mon Sep 17 00:00:00 2001 From: Anat Balzam <13421451+anatbal@users.noreply.github.com> Date: Tue, 14 Mar 2023 14:26:13 +0200 Subject: [PATCH] Support Airlock in GOV cloud (#3338) * hardcoded storage endpoint * fix unit tests, api hardcoded value * bump api version * support arm env in airlock processor * rename --------- Co-authored-by: Anat Balzam --- airlock_processor/_version.py | 2 +- airlock_processor/local.settings.json-sample | 3 ++- airlock_processor/requirements.txt | 1 + .../shared_code/blob_operations.py | 5 ++-- airlock_processor/shared_code/cloud.py | 26 +++++++++++++++++++ .../tests/shared_code/test_blob_operations.py | 11 +++++--- .../tests/test_data_deletion_trigger.py | 10 ++++--- .../tests/test_status_change_queue_trigger.py | 2 +- api_app/core/cloud.py | 4 +++ api_app/services/airlock.py | 9 ++++--- core/terraform/airlock/airlock_processor.tf | 1 + core/terraform/airlock/variables.tf | 2 ++ core/terraform/main.tf | 1 + core/version.txt | 2 +- 14 files changed, 63 insertions(+), 16 deletions(-) create mode 100644 airlock_processor/shared_code/cloud.py diff --git a/airlock_processor/_version.py b/airlock_processor/_version.py index 4b2ce7df3..f658d0a64 100644 --- a/airlock_processor/_version.py +++ b/airlock_processor/_version.py @@ -1 +1 @@ -__version__ = "0.4.13" +__version__ = "0.4.14" diff --git a/airlock_processor/local.settings.json-sample b/airlock_processor/local.settings.json-sample index 499c9ed9c..b7dadb213 100644 --- a/airlock_processor/local.settings.json-sample +++ b/airlock_processor/local.settings.json-sample @@ -8,6 +8,7 @@ "BLOB_CREATED_TOPIC_NAME": "", "TOPIC_SUBSCRIPTION_NAME":"", "TRE_ID": "", - "ENABLE_MALWARE_SCANNING": "false" + "ENABLE_MALWARE_SCANNING": "false", + "ARM_ENVIRONMENT": "public" } } diff --git a/airlock_processor/requirements.txt b/airlock_processor/requirements.txt index de9f1552c..422a8a334 100644 --- a/airlock_processor/requirements.txt +++ b/airlock_processor/requirements.txt @@ -6,3 +6,4 @@ azure-identity azure-mgmt-storage azure-mgmt-resource pydantic +azure-cli-core diff --git a/airlock_processor/shared_code/blob_operations.py b/airlock_processor/shared_code/blob_operations.py index 231a42ccc..511b7d7e4 100644 --- a/airlock_processor/shared_code/blob_operations.py +++ b/airlock_processor/shared_code/blob_operations.py @@ -4,6 +4,7 @@ import logging import json import re from typing import Tuple +from shared_code.cloud import get_storage_endpoint_suffix from azure.core.exceptions import ResourceExistsError from azure.identity import DefaultAzureCredential @@ -13,7 +14,7 @@ from exceptions import NoFilesInRequestException, TooManyFilesInRequestException def get_account_url(account_name: str) -> str: - return f"https://{account_name}.blob.core.windows.net/" + return f"https://{account_name}.blob.{get_storage_endpoint_suffix()}/" def get_blob_client_from_blob_info(storage_account_name: str, container_name: str, blob_name: str): @@ -120,7 +121,7 @@ def get_blob_info_from_topic_and_subject(topic: str, subject: str): def get_blob_info_from_blob_url(blob_url: str) -> Tuple[str, str, str]: # Example of blob url: https://stalimappws663d.blob.core.windows.net/50866a82-d13a-4fd5-936f-deafdf1022ce/test_blob.txt - return re.search(r'https://(.*?).blob.core.windows.net/(.*?)/(.*?)$', blob_url).groups() + return re.search(rf'https://(.*?).blob.{get_storage_endpoint_suffix()}/(.*?)/(.*?)$', blob_url).groups() def get_blob_url(account_name: str, container_name: str, blob_name='') -> str: diff --git a/airlock_processor/shared_code/cloud.py b/airlock_processor/shared_code/cloud.py new file mode 100644 index 000000000..d8396f698 --- /dev/null +++ b/airlock_processor/shared_code/cloud.py @@ -0,0 +1,26 @@ +import logging +import os +from azure.cli.core import cloud + + +def _get_arm_environment(): + try: + arm_environment = os.environ["ARM_ENVIRONMENT"].lower() + except KeyError as e: + logging.error(f'Missing environment variable: {e}') + raise + return arm_environment + + +# Get active cloud information such as endpoints and suffixes +def get_cloud() -> cloud.Cloud: + arm_env = _get_arm_environment() + supported_clouds = {"public": cloud.AZURE_PUBLIC_CLOUD, "usgovernment": cloud.AZURE_US_GOV_CLOUD} + if arm_env in supported_clouds: + return supported_clouds[arm_env] + raise ValueError( + f"Invalid arm environment. Got: {arm_env}. Supported envs are: {', '.join(supported_clouds.keys())}.") + + +def get_storage_endpoint_suffix() -> str: + return get_cloud().suffixes.storage_endpoint diff --git a/airlock_processor/tests/shared_code/test_blob_operations.py b/airlock_processor/tests/shared_code/test_blob_operations.py index 6f49f38df..ab4061851 100644 --- a/airlock_processor/tests/shared_code/test_blob_operations.py +++ b/airlock_processor/tests/shared_code/test_blob_operations.py @@ -1,16 +1,19 @@ from collections import namedtuple import json +import os import pytest from mock import MagicMock, patch from shared_code.blob_operations import get_blob_info_from_topic_and_subject, get_blob_info_from_blob_url, copy_data, get_blob_url from exceptions import TooManyFilesInRequestException, NoFilesInRequestException +from shared_code.cloud import get_storage_endpoint_suffix def get_test_blob(): return namedtuple("Blob", "name") +@patch.dict(os.environ, {'ARM_ENVIRONMENT': 'public'}) class TestBlobOperations(): def test_get_blob_info_from_topic_and_subject(self): @@ -24,7 +27,7 @@ class TestBlobOperations(): assert blob_name == "BLOB" def test_get_blob_info_from_url(self): - url = "https://stalimextest.blob.core.windows.net/c144728c-3c69-4a58-afec-48c2ec8bfd45/test_dataset.txt" + url = f"https://stalimextest.blob.{get_storage_endpoint_suffix()}/c144728c-3c69-4a58-afec-48c2ec8bfd45/test_dataset.txt" storage_account_name, container_name, blob_name = get_blob_info_from_blob_url(blob_url=url) @@ -49,7 +52,7 @@ class TestBlobOperations(): @patch("shared_code.blob_operations.BlobServiceClient") @patch("shared_code.blob_operations.generate_container_sas", return_value="sas") def test_copy_data_adds_copied_from_metadata(self, _, mock_blob_service_client): - source_url = "http://storageacct.blob.core.windows.net/container/blob" + source_url = f"http://storageacct.blob.{get_storage_endpoint_suffix()}/container/blob" # Check for two scenarios: when there's no copied_from history in metadata, and when there is some for source_metadata, dest_metadata in [ @@ -84,11 +87,11 @@ class TestBlobOperations(): blob_name = "blob" blob_url = get_blob_url(account_name, container_name, blob_name) - assert blob_url == f"https://{account_name}.blob.core.windows.net/{container_name}/{blob_name}" + assert blob_url == f"https://{account_name}.blob.{get_storage_endpoint_suffix()}/{container_name}/{blob_name}" def test_get_blob_url_without_blob_name_should_return_container_url(self): account_name = "account" container_name = "container" blob_url = get_blob_url(account_name, container_name) - assert blob_url == f"https://{account_name}.blob.core.windows.net/{container_name}/" + assert blob_url == f"https://{account_name}.blob.{get_storage_endpoint_suffix()}/{container_name}/" diff --git a/airlock_processor/tests/test_data_deletion_trigger.py b/airlock_processor/tests/test_data_deletion_trigger.py index 4cd34a48e..de535ceca 100644 --- a/airlock_processor/tests/test_data_deletion_trigger.py +++ b/airlock_processor/tests/test_data_deletion_trigger.py @@ -1,12 +1,16 @@ +import os from mock import patch, MagicMock from DataDeletionTrigger import delete_blob_and_container_if_last_blob +from shared_code.cloud import get_storage_endpoint_suffix +@patch.dict(os.environ, {'ARM_ENVIRONMENT': 'public'}) class TestDataDeletionTrigger(): + @patch("DataDeletionTrigger.BlobServiceClient") def test_delete_blob_and_container_if_last_blob_deletes_container(self, mock_blob_service_client): - blob_url = "https://stalimextest.blob.core.windows.net/c144728c-3c69-4a58-afec-48c2ec8bfd45/test_dataset.txt" + blob_url = f"https://stalimextest.blob.{get_storage_endpoint_suffix()}/c144728c-3c69-4a58-afec-48c2ec8bfd45/test_dataset.txt" mock_blob_service_client().get_container_client().list_blobs = MagicMock(return_value=["blob"]) @@ -16,7 +20,7 @@ class TestDataDeletionTrigger(): @patch("DataDeletionTrigger.BlobServiceClient") def test_delete_blob_and_container_if_last_blob_doesnt_delete_container(self, mock_blob_service_client): - blob_url = "https://stalimextest.blob.core.windows.net/c144728c-3c69-4a58-afec-48c2ec8bfd45/test_dataset.txt" + blob_url = f"https://stalimextest.blob.{get_storage_endpoint_suffix()}/c144728c-3c69-4a58-afec-48c2ec8bfd45/test_dataset.txt" mock_blob_service_client().get_container_client().list_blobs = MagicMock(return_value=["blob1", "blob2"]) @@ -26,6 +30,6 @@ class TestDataDeletionTrigger(): @patch("DataDeletionTrigger.BlobServiceClient") def test_delete_blob_and_container_if_last_blob_deletes_container_if_no_blob_specified(self, mock_blob_service_client): - blob_url = "https://stalimextest.blob.core.windows.net/c144728c-3c69-4a58-afec-48c2ec8bfd45/" + blob_url = f"https://stalimextest.blob.{get_storage_endpoint_suffix()}/c144728c-3c69-4a58-afec-48c2ec8bfd45/" delete_blob_and_container_if_last_blob(blob_url) mock_blob_service_client().get_container_client().delete_container.assert_called_once() diff --git a/airlock_processor/tests/test_status_change_queue_trigger.py b/airlock_processor/tests/test_status_change_queue_trigger.py index 65bac890a..0da47c580 100644 --- a/airlock_processor/tests/test_status_change_queue_trigger.py +++ b/airlock_processor/tests/test_status_change_queue_trigger.py @@ -111,7 +111,7 @@ class TestFileEnumeration(): class TestFilesDeletion(): @patch("StatusChangedQueueTrigger.set_output_event_to_trigger_container_deletion") - @patch.dict(os.environ, {"TRE_ID": "tre-id"}, clear=True) + @patch.dict(os.environ, {"TRE_ID": "tre-id", 'ARM_ENVIRONMENT': 'public'}, clear=True) def test_delete_request_files_should_be_called_on_cancel_stage(self, mock_set_output_event_to_trigger_container_deletion): message_body = "{ \"data\": { \"request_id\":\"123\",\"new_status\":\"cancelled\" ,\"previous_status\":\"draft\" , \"type\":\"export\", \"workspace_id\":\"ws1\" }}" message = _mock_service_bus_message(body=message_body) diff --git a/api_app/core/cloud.py b/api_app/core/cloud.py index 82fdaa94a..a59f327d0 100644 --- a/api_app/core/cloud.py +++ b/api_app/core/cloud.py @@ -31,3 +31,7 @@ def get_resource_manager_credential_scopes(): def get_microsoft_graph_url() -> str: return get_cloud().endpoints.microsoft_graph_resource_id.strip("/") + + +def get_storage_endpoint_suffix() -> str: + return get_cloud().suffixes.storage_endpoint diff --git a/api_app/services/airlock.py b/api_app/services/airlock.py index a0d4ed40f..33f08d2a7 100644 --- a/api_app/services/airlock.py +++ b/api_app/services/airlock.py @@ -3,6 +3,7 @@ import logging from azure.storage.blob import generate_container_sas, ContainerSasPermissions, BlobServiceClient from fastapi import HTTPException, status +from core.cloud import get_storage_endpoint_suffix from core import config, credentials from models.domain.airlock_request import AirlockRequest, AirlockRequestStatus, AirlockRequestType, AirlockReviewUserResource, AirlockReviewDecision, AirlockActions, AirlockFile, AirlockReview from models.domain.authentication import User @@ -33,6 +34,8 @@ from db.repositories.resources_history import ResourceHistoryRepository from collections import defaultdict from event_grid.event_sender import send_status_changed_event, send_airlock_notification_event +STORAGE_ENDPOINT = get_storage_endpoint_suffix() + def get_account_by_request(airlock_request: AirlockRequest, workspace: Workspace) -> str: tre_id = config.TRE_ID @@ -115,12 +118,12 @@ def get_airlock_request_container_sas_token(account_name: str, permission=required_permission, expiry=expiry) - return "https://{}.blob.core.windows.net/{}?{}" \ - .format(account_name, airlock_request.id, token) + return "https://{}.blob.{}/{}?{}" \ + .format(account_name, STORAGE_ENDPOINT, airlock_request.id, token) def get_account_url(account_name: str) -> str: - return f"https://{account_name}.blob.core.windows.net/" + return f"https://{account_name}.blob.{STORAGE_ENDPOINT}/" async def review_airlock_request(airlock_review_input: AirlockReviewInCreate, airlock_request: AirlockRequest, user: User, workspace: Workspace, diff --git a/core/terraform/airlock/airlock_processor.tf b/core/terraform/airlock/airlock_processor.tf index 24fa4f44e..786bb5760 100644 --- a/core/terraform/airlock/airlock_processor.tf +++ b/core/terraform/airlock/airlock_processor.tf @@ -60,6 +60,7 @@ resource "azurerm_linux_function_app" "airlock_function_app" { "AIRLOCK_SCAN_RESULT_QUEUE_NAME" = local.scan_result_queue_name "AIRLOCK_DATA_DELETION_QUEUE_NAME" = local.data_deletion_queue_name "ENABLE_MALWARE_SCANNING" = var.enable_malware_scanning + "ARM_ENVIRONMENT" = var.arm_environment "MANAGED_IDENTITY_CLIENT_ID" = azurerm_user_assigned_identity.airlock_id.client_id "TRE_ID" = var.tre_id "WEBSITE_CONTENTOVERVNET" = 1 diff --git a/core/terraform/airlock/variables.tf b/core/terraform/airlock/variables.tf index 2b23b44b7..3a98198c9 100644 --- a/core/terraform/airlock/variables.tf +++ b/core/terraform/airlock/variables.tf @@ -44,6 +44,8 @@ variable "enable_malware_scanning" { description = "If False, Airlock requests will skip the malware scanning stage" } +variable "arm_environment" {} + variable "log_analytics_workspace_id" {} variable "blob_core_dns_zone_id" {} diff --git a/core/terraform/main.tf b/core/terraform/main.tf index 3672a2c86..e00b04456 100644 --- a/core/terraform/main.tf +++ b/core/terraform/main.tf @@ -115,6 +115,7 @@ module "airlock_resources" { airlock_servicebus = azurerm_servicebus_namespace.sb applicationinsights_connection_string = module.azure_monitor.app_insights_connection_string enable_malware_scanning = var.enable_airlock_malware_scanning + arm_environment = var.arm_environment tre_core_tags = local.tre_core_tags log_analytics_workspace_id = module.azure_monitor.log_analytics_workspace_id blob_core_dns_zone_id = module.network.blob_core_dns_zone_id diff --git a/core/version.txt b/core/version.txt index 6de11f480..1746246f4 100644 --- a/core/version.txt +++ b/core/version.txt @@ -1 +1 @@ -__version__ = "0.7.14" +__version__ = "0.7.15"