diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py
index b3c76882c85..b20027f6797 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py
@@ -65,11 +65,10 @@ async def ensure_service_availability(rai_svc_url: str, token: str, capability:
     headers = get_common_headers(token)
     svc_liveness_url = rai_svc_url + "/checkannotation"
 
-    client = get_async_http_client()
-
-    response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-        svc_liveness_url, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
-    )
+    async with get_async_http_client() as client:
+        response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+            svc_liveness_url, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
+        )
 
     if response.status_code != 200:
         raise Exception(  # pylint: disable=broad-exception-raised
@@ -143,11 +142,10 @@ async def submit_request(question: str, answer: str, metric: str, rai_svc_url: s
     url = rai_svc_url + "/submitannotation"
     headers = get_common_headers(token)
 
-    client = get_async_http_client()
-
-    response = await client.post(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-        url, json=payload, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
-    )
+    async with get_async_http_client() as client:
+        response = await client.post(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+            url, json=payload, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
+        )
 
     if response.status_code != 202:
         print("Fail evaluating '%s' with error message: %s" % (payload["UserTextList"], response.text))
@@ -180,11 +178,10 @@ async def fetch_result(operation_id: str, rai_svc_url: str, credential: TokenCre
         token = await fetch_or_reuse_token(credential, token)
         headers = get_common_headers(token)
 
-        client = get_async_http_client()
-
-        response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-            url, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
-        )
+        async with get_async_http_client() as client:
+            response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+                url, headers=headers, timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT
+            )
 
         if response.status_code == 200:
             return response.json()
@@ -342,16 +339,15 @@ async def _get_service_discovery_url(azure_ai_project: dict, token: str) -> str:
     """
     headers = get_common_headers(token)
 
-    client = get_async_http_client()
-
-    response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-        f"https://management.azure.com/subscriptions/{azure_ai_project['subscription_id']}/"
-        f"resourceGroups/{azure_ai_project['resource_group_name']}/"
-        f"providers/Microsoft.MachineLearningServices/workspaces/{azure_ai_project['project_name']}?"
-        f"api-version=2023-08-01-preview",
-        headers=headers,
-        timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT,
-    )
+    async with get_async_http_client() as client:
+        response = await client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+            f"https://management.azure.com/subscriptions/{azure_ai_project['subscription_id']}/"
+            f"resourceGroups/{azure_ai_project['resource_group_name']}/"
+            f"providers/Microsoft.MachineLearningServices/workspaces/{azure_ai_project['project_name']}?"
+            f"api-version=2023-08-01-preview",
+            headers=headers,
+            timeout=CommonConstants.DEFAULT_HTTP_TIMEOUT,
+        )
 
     if response.status_code != 200:
         raise Exception("Failed to retrieve the discovery service URL")  # pylint: disable=broad-exception-raised
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py
index c627ee206da..1088820d385 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py
@@ -321,7 +321,7 @@ class AdversarialSimulator:
             )
         )
 
-        async with semaphore:
+        async with semaphore, session:
             _, conversation_history = await simulate_conversation(
                 bots=bots,
                 session=session,
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py
index 181a798fece..6b23e9bb6b7 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_proxy_completion_model.py
@@ -196,17 +196,16 @@ class ProxyChatCompletionsModel(OpenAIChatCompletionsModel):
             retry_mode=RetryMode.Exponential,
         )
 
-        exp_retry_client = get_async_http_client().with_policies(retry_policy=retry_policy)
-
         # initial 15 seconds wait before attempting to fetch result
         # Need to wait both in this thread and in the async thread for some reason?
         # Someone not under a crunch and with better async understandings should dig into this more.
         await asyncio.sleep(15)
         time.sleep(15)
 
-        response = await exp_retry_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
-            self.result_url, headers=proxy_headers
-        )
+        async with get_async_http_client().with_policies(retry_policy=retry_policy) as exp_retry_client:
+            response = await exp_retry_client.get(  # pylint: disable=too-many-function-args,unexpected-keyword-arg
+                self.result_url, headers=proxy_headers
+            )
 
         response.raise_for_status()
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py
index ad7a306d792..99b06ef19c4 100644
--- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py
+++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py
@@ -122,7 +122,9 @@ class RAIClient:
         }
 
         session = self._create_async_client()
-        response = await session.get(url=url, headers=headers)  # pylint: disable=unexpected-keyword-arg
+
+        async with session:
+            response = await session.get(url=url, headers=headers)  # pylint: disable=unexpected-keyword-arg
 
         if response.status_code == 200:
             return response.json()
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
index d980db319e0..8e2ced92211 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/conftest.py
@@ -471,3 +471,28 @@ def pytest_collection_modifyitems(items):
                 # If item's parent was marked as 'localtest', mark the child as such, but not if
                 # it was marked as 'azuretest'.
                 item.add_marker(pytest.mark.localtest)
+
+
+def pytest_sessionfinish() -> None:
+
+    def stop_promptflow_service() -> None:
+        """Ensure that the promptflow service is stopped when pytest exits.
+
+        .. note::
+
+            The azure-sdk-for-python CI performs a cleanup step that deletes
+            the python environment that the tests run in.
+
+            At time of writing, at least one test starts the promptflow service
+            (served from `waitress-serve`). The promptflow service is a separate
+            process that gets orphaned by pytest.
+
+            Crucially, that process has a handles on files in the python environment.
+            On Windows, this causes the cleanup step to fail with a permission issue
+            since the OS disallows deletion of files in use by a process.
+        """
+        from promptflow._cli._pf._service import stop_service
+
+        stop_service()
+
+    stop_promptflow_service()
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_synthetic_conversation_bot.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_synthetic_conversation_bot.py
index 4d336018a81..fb8b1188d9d 100644
--- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_synthetic_conversation_bot.py
+++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_synthetic_conversation_bot.py
@@ -84,9 +84,11 @@ class TestConversationBot:
             )
         )
 
-        parsed_response, req, time_taken, full_response = await bot.generate_response(
-            session=client, conversation_history=[], max_history=0, turn_number=0
-        )
+        async with client:
+            parsed_response, req, time_taken, full_response = await bot.generate_response(
+                session=client, conversation_history=[], max_history=0, turn_number=0
+            )
+
         assert (
             parsed_response["samples"][0]
             == bot_invalid_jinja_params["instantiation_parameters"]["conversation_starter"]
diff --git a/sdk/evaluation/ci.yml b/sdk/evaluation/ci.yml
index 31a571d0c8a..826ed23c3e6 100644
--- a/sdk/evaluation/ci.yml
+++ b/sdk/evaluation/ci.yml
@@ -28,13 +28,6 @@ extends:
     ServiceDirectory: evaluation
     ValidateFormatting: true
     TestProxy: true
-    # This custom matrix config should be dropped once:
-    #  * Resolve the issue of windows runners crashing because a file isn't deletable
-    MatrixConfigs: 
-      - Name: ai_ci_matrix
-        Path: sdk/evaluation/platform-matrix.json
-        Selection: sparse
-        GenerateVMJobs: true
     Artifacts:
     - name: azure-ai-evaluation
       safeName: azureaievaluation
diff --git a/sdk/evaluation/platform-matrix.json b/sdk/evaluation/platform-matrix.json
deleted file mode 100644
index 92e3a60c726..00000000000
--- a/sdk/evaluation/platform-matrix.json
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-  "displayNames": {
-    "--disablecov": "",
-    "false": "",
-    "true": ""
-  },
-  "matrix": {
-    "Agent": {
-      "macos-latest": { "OSVmImage": "env:MACVMIMAGE", "Pool": "env:MACPOOL" },
-      "ubuntu-20.04": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" }
-    },
-    "PythonVersion": [ "3.8", "3.11", "3.10" ],
-    "CoverageArg": "--disablecov",
-    "TestSamples": "false"
-  },
-  "include": [
-    {
-      "CoverageConfig": {
-        "ubuntu2004_39_coverage": {
-          "OSVmImage": "env:LINUXVMIMAGE",
-          "Pool": "env:LINUXPOOL",
-          "PythonVersion": "3.9",
-          "CoverageArg": "",
-          "TestSamples": "false"
-        }
-      }
-    },
-    {
-      "CoverageConfig": {
-        "ubuntu2004_pypy39": {
-          "OSVmImage": "env:LINUXVMIMAGE",
-          "Pool": "env:LINUXPOOL",
-          "PythonVersion": "pypy3.9",
-          "CoverageArg": "",
-          "TestSamples": "false"
-        }
-      }
-    },
-    {
-      "Config": {
-        "Ubuntu2004_312": {
-          "OSVmImage": "env:LINUXVMIMAGE",
-          "Pool": "env:LINUXPOOL",
-          "PythonVersion": "3.12",
-          "CoverageArg": "--disablecov",
-          "TestSamples": "false"
-        }
-      }
-    }
-  ]
-}