Fix DBCopilot environment Vulnerabilities (#3277)

* Fix DBCopilot environment Vulnerabilities

* fix

* fix

* fix

* FIX

* fix

* fix

* fix

* fix
This commit is contained in:
XiangRao 2024-08-26 14:29:13 +08:00 коммит произвёл GitHub
Родитель 7c566f9458
Коммит 07e93b42dd
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
12 изменённых файлов: 99 добавлений и 55 удалений

Просмотреть файл

@ -4,7 +4,7 @@ tags:
Preview: ""
name: llm_ingest_db_to_acs
display_name: LLM - SQL Datastore to ACS Pipeline
version: 0.0.93
version: 0.0.94
description: Single job pipeline to chunk data from AzureML sql data store, and create ACS embeddings index
settings:
default_compute: serverless
@ -154,7 +154,7 @@ jobs:
output_grounding_context_file: ${{parent.outputs.db_context}}
environment_variables:
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
component: "azureml:llm_dbcopilot_grounding:0.0.67"
component: "azureml:llm_dbcopilot_grounding:0.0.68"
type: command
generate_meta_embeddings:
type: command
@ -164,7 +164,7 @@ jobs:
properties:
compute_specification:
automatic: true
component: "azureml:llm_rag_generate_embeddings:0.0.60"
component: "azureml:llm_rag_generate_embeddings:0.0.64"
inputs:
chunks_source:
type: uri_folder
@ -216,12 +216,12 @@ jobs:
path: ${{parent.inputs.acs_config}}
outputs:
index: ${{parent.outputs.grounding_index}}
component: "azureml:llm_rag_update_acs_index:0.0.62"
component: "azureml:llm_rag_update_acs_index:0.0.68"
type: command
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.42"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.43"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -241,7 +241,7 @@ jobs:
#########################################
generate_sample_embeddings:
type: command
component: "azureml:llm_rag_generate_embeddings:0.0.60"
component: "azureml:llm_rag_generate_embeddings:0.0.64"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -262,7 +262,7 @@ jobs:
#########################################
create_sample_acs_index_job:
type: command
component: "azureml:llm_rag_update_acs_index:0.0.62"
component: "azureml:llm_rag_update_acs_index:0.0.68"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -296,7 +296,7 @@ jobs:
outputs:
asset_id:
type: uri_file
component: "azureml:llm_rag_register_mlindex_asset:0.0.62"
component: "azureml:llm_rag_register_mlindex_asset:0.0.68"
type: command
create_prompt_flow:
environment_variables:
@ -336,5 +336,5 @@ jobs:
path: ${{parent.inputs.include_views}}
instruct_template:
path: ${{parent.inputs.instruct_template}}
component: "azureml:llm_dbcopilot_create_promptflow:0.0.67"
component: "azureml:llm_dbcopilot_create_promptflow:0.0.68"
type: command

Просмотреть файл

@ -4,7 +4,7 @@ tags:
Preview: ""
name: llm_ingest_db_to_faiss
display_name: LLM - SQL Datastore to FAISS Pipeline
version: 0.0.93
version: 0.0.94
description: Single job pipeline to chunk data from AzureML sql data store, and create FAISS embeddings index
settings:
default_compute: serverless
@ -144,7 +144,7 @@ jobs:
output_grounding_context_file: ${{parent.outputs.db_context}}
environment_variables:
MANAGED_IDENTITY_ENABLED: ${{parent.inputs.managed_identity_enabled}}
component: "azureml:llm_dbcopilot_grounding:0.0.67"
component: "azureml:llm_dbcopilot_grounding:0.0.68"
type: command
generate_meta_embeddings:
type: command
@ -203,13 +203,13 @@ jobs:
path: ${{parent.jobs.generate_meta_embeddings.outputs.embeddings}}
outputs:
index: ${{parent.outputs.grounding_index}}
component: "azureml:llm_rag_create_faiss_index:0.0.65"
component: "azureml:llm_rag_create_faiss_index:0.0.69"
type: command
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.42"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.43"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -229,7 +229,7 @@ jobs:
#########################################
generate_sample_embeddings:
type: command
component: "azureml:llm_rag_generate_embeddings:0.0.60"
component: "azureml:llm_rag_generate_embeddings:0.0.64"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -250,7 +250,7 @@ jobs:
#########################################
create_sample_faiss_index_job:
type: command
component: "azureml:llm_rag_create_faiss_index:0.0.65"
component: "azureml:llm_rag_create_faiss_index:0.0.69"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -280,7 +280,7 @@ jobs:
outputs:
asset_id:
type: uri_file
component: "azureml:llm_rag_register_mlindex_asset:0.0.62"
component: "azureml:llm_rag_register_mlindex_asset:0.0.68"
type: command
create_prompt_flow:
environment_variables:
@ -320,5 +320,5 @@ jobs:
path: ${{parent.inputs.include_views}}
instruct_template:
path: ${{parent.inputs.instruct_template}}
component: "azureml:llm_dbcopilot_create_promptflow:0.0.67"
component: "azureml:llm_dbcopilot_create_promptflow:0.0.68"
type: command

Просмотреть файл

@ -2,7 +2,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.js
type: pipeline
name: llm_ingest_dbcopilot_acs_e2e
version: 0.0.62
version: 0.0.63
display_name: Data Ingestion for DB Data Output to ACS E2E Deployment
description: Single job pipeline to chunk data from AzureML DB Datastore and create acs embeddings index
@ -141,7 +141,7 @@ jobs:
#########################################
db_meta_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding:0.0.67"
component: "azureml:llm_dbcopilot_grounding:0.0.68"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -170,7 +170,7 @@ jobs:
#########################################
generate_meta_embeddings:
type: command
component: "azureml:llm_rag_generate_embeddings:0.0.60"
component: "azureml:llm_rag_generate_embeddings:0.0.64"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -191,7 +191,7 @@ jobs:
#########################################
create_meta_acs_index_job:
type: command
component: "azureml:llm_rag_update_acs_index:0.0.62"
component: "azureml:llm_rag_update_acs_index:0.0.68"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -213,7 +213,7 @@ jobs:
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.42"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.43"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -233,7 +233,7 @@ jobs:
#########################################
generate_sample_embeddings:
type: command
component: "azureml:llm_rag_generate_embeddings:0.0.60"
component: "azureml:llm_rag_generate_embeddings:0.0.64"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -254,7 +254,7 @@ jobs:
#########################################
create_sample_acs_index_job:
type: command
component: "azureml:llm_rag_update_acs_index:0.0.62"
component: "azureml:llm_rag_update_acs_index:0.0.68"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -275,7 +275,7 @@ jobs:
#########################################
endpoint_deployment_job:
type: command
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.43"
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.44"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}

Просмотреть файл

@ -2,7 +2,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.js
type: pipeline
name: llm_ingest_dbcopilot_faiss_e2e
version: 0.0.62
version: 0.0.63
display_name: Data Ingestion for DB Data Output to FAISS E2E Deployment
description: Single job pipeline to chunk data from AzureML DB Datastore and create faiss embeddings index
@ -131,7 +131,7 @@ jobs:
#########################################
db_meta_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding:0.0.67"
component: "azureml:llm_dbcopilot_grounding:0.0.68"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -160,7 +160,7 @@ jobs:
#########################################
generate_meta_embeddings:
type: command
component: "azureml:llm_rag_generate_embeddings:0.0.60"
component: "azureml:llm_rag_generate_embeddings:0.0.64"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -181,7 +181,7 @@ jobs:
#########################################
create_meta_faiss_index_job:
type: command
component: "azureml:llm_rag_create_faiss_index:0.0.65"
component: "azureml:llm_rag_create_faiss_index:0.0.69"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -201,7 +201,7 @@ jobs:
#########################################
db_sample_loading_generator:
type: command
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.42"
component: "azureml:llm_dbcopilot_grounding_ground_samples:0.0.43"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -221,7 +221,7 @@ jobs:
#########################################
generate_sample_embeddings:
type: command
component: "azureml:llm_rag_generate_embeddings:0.0.60"
component: "azureml:llm_rag_generate_embeddings:0.0.64"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -242,7 +242,7 @@ jobs:
#########################################
create_sample_faiss_index_job:
type: command
component: "azureml:llm_rag_create_faiss_index:0.0.65"
component: "azureml:llm_rag_create_faiss_index:0.0.69"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}
@ -259,7 +259,7 @@ jobs:
#########################################
endpoint_deployment_job:
type: command
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.43"
component: "azureml:llm_dbcopilot_deploy_endpoint:0.0.44"
resources:
instance_count: ${{parent.inputs.serverless_instance_count}}
instance_type: ${{parent.inputs.serverless_instance_type}}

Просмотреть файл

@ -4,7 +4,7 @@ tags:
Preview: ""
name: llm_dbcopilot_create_promptflow
display_name: LLM - Create DBCopilot Prompt Flow
version: 0.0.67
version: 0.0.68
inputs:
index_name:
type: string

Просмотреть файл

@ -3,7 +3,7 @@ type: command
tags: {}
name: llm_dbcopilot_deploy_endpoint
display_name: LLM - DBCopilot Deploy Endpoint Component
version: 0.0.43
version: 0.0.44
inputs:
deployment_name:
type: string

Просмотреть файл

@ -3,7 +3,7 @@ type: command
tags:
Preview: ""
name: llm_dbcopilot_grounding
version: 0.0.67
version: 0.0.68
inputs:
asset_uri:
type: string

Просмотреть файл

@ -3,7 +3,7 @@ type: command
tags: {}
name: llm_dbcopilot_grounding_ground_samples
display_name: LLM - DBCopilot Grounding Ground Samples Component
version: 0.0.42
version: 0.0.43
inputs:
grounding_context:
type: uri_folder

Просмотреть файл

@ -10,6 +10,7 @@ from dataclasses import asdict, dataclass
from enum import Enum
from typing import Dict, List, Optional, Union
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from azureml.contrib.services.aml_request import AMLRequest, rawhttp
from azureml.contrib.services.aml_response import AMLResponse
from db_copilot.contract.memory_core import MemoryItem
@ -51,18 +52,43 @@ def init():
# setting up db copilot
with open(os.path.join(current_dir, "secrets.json")) as f:
secret_manager: dict = json.load(f)
embedding_aoai_connection = AzureOpenAIConnection(
api_key=secret_manager.get("embedding-aoai-api-key"),
api_base=secret_manager.get("embedding-aoai-api-base"),
api_type="azure",
api_version="2023-03-15-preview",
)
chat_aoai_connection = AzureOpenAIConnection(
api_key=secret_manager.get("chat-aoai-api-key"),
api_base=secret_manager.get("chat-aoai-api-base"),
api_type="azure",
api_version="2023-03-15-preview",
)
api_key = secret_manager.get("embedding-aoai-api-key", None)
if api_key:
logging.info("Using api_key access Azure OpenAI")
embedding_aoai_connection = AzureOpenAIConnection(
api_key=secret_manager.get("embedding-aoai-api-key"),
api_base=secret_manager.get("embedding-aoai-api-base"),
api_type="azure",
api_version="2023-03-15-preview",
)
chat_aoai_connection = AzureOpenAIConnection(
api_key=secret_manager.get("chat-aoai-api-key"),
api_base=secret_manager.get("chat-aoai-api-base"),
api_type="azure",
api_version="2023-03-15-preview",
)
else:
logging.info("using managed identity access Azure OpenAI")
try:
token_provider = get_bearer_token_provider(
DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)
logging.info("Successfully obtained token")
except Exception as e:
logging.error(f"Failed to obtain token: {e}")
raise Exception(f"Failed to obtain token: {e}")
embedding_aoai_connection = AzureOpenAIConnection(
token_provider=token_provider,
api_base=secret_manager.get("embedding-aoai-api-base"),
api_type="azure",
api_version="2023-03-15-preview",
)
chat_aoai_connection = AzureOpenAIConnection(
token_provider=token_provider,
api_base=secret_manager.get("chat-aoai-api-base"),
api_type="azure",
api_version="2023-03-15-preview",
)
embedding_deploy_name = secret_manager.get("embedding-deploy-name")
chat_deploy_name = secret_manager.get("chat-deploy-name")
shared_config_file = os.path.join(current_dir, "shared_config.json")

Просмотреть файл

@ -62,6 +62,13 @@ class EndpointDeploymentBase(OBOComponentBase):
}
)
logging.info("Using workspace connection key for OpenAI")
else:
logging.info("Using managed identity for OpenAI")
secrets.update(
{
f"{connection_type}-aoai-api-base": connection.target,
}
)
else:
raise ValueError(
"Please specify the connection id (AZUREML_WORKSPACE_CONNECTION_ID_AOAI_EMBEDDING & AZUREML_WORKSPACE_CONNECTION_ID_AOAI_CHAT) for embedding and chat" # noqa: E501
@ -151,7 +158,7 @@ class EndpointDeploymentBase(OBOComponentBase):
except Exception as e:
logging.error(f"Deployment failed: {e}")
logs = ml_client.online_deployments.get_logs(
name=deployment_name, endpoint_name=endpoint_name, lines=100
name=deployment_name, endpoint_name=endpoint_name, lines=300
)
logging.error(f"Endpoint deployment logs: {logs}")
raise e

Просмотреть файл

@ -17,7 +17,12 @@ RUN apt-get update && apt-get install -y \
libpython3.8-minimal \
libpython3.8-stdlib \
python3.8-minimal \
python3.8 &&\
python3.8 \
libssl1.1=1.1.1f-1ubuntu2.23 \
openssl=1.1.1f-1ubuntu2.23 \
curl=7.68.0-1ubuntu2.23 \
libcurl3-gnutls=7.68.0-1ubuntu2.23 \
libcurl4=7.68.0-1ubuntu2.23 &&\
rm -rf /var/lib/apt/lists/*
# Install MS SQL ODBC Driver
@ -43,7 +48,7 @@ RUN /bin/bash -c "source /opt/miniconda/etc/profile.d/conda.sh && \
conda activate $AZUREML_CONDA_ENVIRONMENT_PATH && \
pip install --upgrade pip && \
pip install -r requirements.txt && \
pip install promptflow-vectordb==0.2.13 && \
pip install promptflow-vectordb==0.2.13 promptflow-tools==1.4.0 && \
pip install db-copilot-tool==0.0.8 &&\
pip install cryptography==42.0.5 langchain==0.1.11 idna==3.7 sqlparse==0.5.0 gunicorn==22.0.0 Werkzeug==3.0.3 requests==2.32.0 azure-identity==1.16.1"

Просмотреть файл

@ -9,6 +9,12 @@ RUN apt-get update \
&& curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list | tee /etc/apt/sources.list.d/mssql-release.list \
&& apt-get update \
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18=18.3.3.1-1 \
python3.10-minimal \
python3.10 \
libpython3.10-minimal \
libpython3.10-stdlib \
curl=7.81.0-1ubuntu1.17 \
libcurl4=7.81.0-1ubuntu1.17 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
@ -25,15 +31,15 @@ RUN python -m pip install --upgrade pip && \
pip install -r requirements.txt && \
# Install promptflow environment
pip install promptflow-image-bundle[azure]==0.3.0 && \
pip install promptflow-vectordb==0.2.10 && \
pip install promptflow-vectordb==0.2.10 promptflow-tools==1.4.0 && \
## Install dbcopilot
pip install db-copilot-tool==0.0.8 &&\
pip install setuptools==70.0.0
pip install setuptools==70.0.0 grpcio==1.56.2
# Fix vunerabilities
RUN /bin/bash -c "source /opt/miniconda/etc/profile.d/conda.sh && \
conda activate /opt/miniconda/envs/amlenv && \
pip install sqlparse==0.5.0 gunicorn==22.0.0 Werkzeug==3.0.3 azure-identity==1.16.1 certifi==2024.07.04&& \
pip install sqlparse==0.5.0 gunicorn==22.0.0 Werkzeug==3.0.3 azure-identity==1.16.1 certifi==2024.07.04 setuptools==70.0.0&& \
conda deactivate"
# For GUNICORN_CMD_ARGS, we need to set the timeout to be 0 so that the server will not timeout