Debugging to solve FeathrClient authorization error

This commit is contained in:
Rizo-R 2023-03-22 14:31:16 -07:00
Родитель 4fc9499a93
Коммит aa47471960
6 изменённых файлов: 89 добавлений и 40 удалений

Просмотреть файл

@ -46,12 +46,15 @@ def parse_args():
# parser.add_argument("--azure_client_id", type=str)
# parser.add_argument("--azure_tenant_id", type=str)
parser.add_argument("--key_vault_name", type=str)
parser.add_argument("--synapse_workspace_url", type=str)
parser.add_argument("--synapse_workspace_name", type=str)
parser.add_argument("--adls_account", type=str)
parser.add_argument("--adls_fs_name", type=str)
parser.add_argument("--webapp_name", type=str)
parser.add_argument("--raw_data", type=str)
parser.add_argument("--train_data", type=str)
parser.add_argument("--sp_client_id", type=str)
parser.add_argument("--sp_client_secret", type=str)
parser.add_argument("--tenant_id", type=str)
args = parser.parse_args()
return args
@ -65,8 +68,9 @@ def set_environment_variables():
# os.environ['AZURE_TENANT_ID'] = utils.fs_config.get("tenant_id")
# # TODO: add client secret, adls key to environment variables
# os.environ['AZURE_CLIENT_ID'] = args.azure_client_id
# os.environ['AZURE_TENANT_ID'] = args.azure_tenant_id
os.environ['AZURE_CLIENT_ID'] = args.sp_client_id
os.environ['AZURE_CLIENT_SECRET'] = args.sp_client_secret
os.environ['AZURE_TENANT_ID'] = args.tenant_id
os.environ['ADLS_ACCOUNT'] = args.adls_account
def set_spark_session():
@ -98,7 +102,9 @@ def get_data_source_path(feathr_client):
def main(args):
feathr_client = utils.get_feathr_client(key_vault_name=args.key_vault_name, synapse_workspace_url=args.synapse_workspace_url, adls_account=args.adls_account, adls_fs_name=args.adls_fs_name, webapp_name=args.webapp_name)
import feathr
print("Feathr version:", feathr.__version__)
feathr_client = utils.get_feathr_client(key_vault_name=args.key_vault_name, synapse_workspace_name=args.synapse_workspace_name, adls_account=args.adls_account, adls_fs_name=args.adls_fs_name, webapp_name=args.webapp_name)
set_spark_session()
data_source_path = get_data_source_path(feathr_client)

Просмотреть файл

@ -2,7 +2,7 @@ import yaml
import os
import logging
from pathlib import Path
from azure.identity import DefaultAzureCredential
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
from azure.keyvault.secrets import SecretClient
from feathr import FeathrClient
@ -29,16 +29,19 @@ logging.basicConfig(
def get_credential():
credential = DefaultAzureCredential(
exclude_interactive_browser_credential=True)
# client_id = "c3830013-88b5-402a-b256-d5031e4f3b19"
# credential = ManagedIdentityCredential(client_id=client_id)
return credential
def set_required_feathr_config(
key_vault_name: str,
synapse_workspace_url: str,
synapse_workspace_name: str,
adls_account: str,
adls_fs_name: str,
webapp_name: str,
credential: DefaultAzureCredential
# credential: DefaultAzureCredential
credential
):
# # Get all the required credentials from Azure Key Vault
@ -52,9 +55,19 @@ def set_required_feathr_config(
# # synapse_workspace_url = resource_prefix + "syws"
# # adls_account = resource_prefix + "dls"
# # adls_fs_name = resource_prefix + "fs"
# Check if given credential can get token successfully.
print(type(credential))
print("Environment variables: {")
for k, v in os.environ.items():
print(f"{k}: {v}")
print("}")
# print("Attempting to get access token...")
# print("Access token:", credential.get_token("https://management.azure.com/.default"))
# print("Got access token!")
print()
key_vault_uri = f"https://{key_vault_name}.vault.azure.net"
client = SecretClient(vault_url=key_vault_uri, credential=credential)
key_vault_url = f"https://{key_vault_name}.vault.azure.net"
client = SecretClient(vault_url=key_vault_url, credential=credential)
secretName = "FEATHR-ONLINE-STORE-CONN"
retrieved_secret = str(client.get_secret(secretName).value)
@ -65,7 +78,7 @@ def set_required_feathr_config(
redis_ssl = retrieved_secret.split(',')[2].split("ssl=", 1)[1]
# Set appropriate environment variables for overriding feathr config
os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_url}.dev.azuresynapse.net'
os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_name}.dev.azuresynapse.net'
os.environ['spark_config__azure_synapse__pool_name'] = 'spdev'
os.environ['spark_config__azure_synapse__workspace_dir'] = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_project'
os.environ['online_store__redis__host'] = redis_host
@ -77,13 +90,13 @@ def set_required_feathr_config(
def get_feathr_client(
key_vault_name: str,
synapse_workspace_url: str,
synapse_workspace_name: str,
adls_account: str,
adls_fs_name: str,
webapp_name: str,
):
credential = get_credential()
set_required_feathr_config(key_vault_name=key_vault_name, synapse_workspace_url=synapse_workspace_url, adls_account=adls_account, adls_fs_name=adls_fs_name, webapp_name=webapp_name, credential=credential)
set_required_feathr_config(key_vault_name=key_vault_name, synapse_workspace_name=synapse_workspace_name, adls_account=adls_account, adls_fs_name=adls_fs_name, webapp_name=webapp_name, credential=credential)
config_file_path = os.path.join(
Path(__file__).parent, "feathr_config.yaml")
logging.info("config path: {}".format(config_file_path))

Просмотреть файл

@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline
experiment_name:
experiment_name: ''
# description: Training Pipeline to train a model that predicts taxi fare price, using Feathr as feature store
# <inputs_and_outputs>
@ -10,12 +10,15 @@ inputs:
path: azureml:feathr-data@latest
enable_monitoring: $enable_monitoring
key_vault_name: ''
synapse_workspace_url: ''
synapse_workspace_name: ''
adls_account: ''
adls_fs_name: ''
redis_name: ''
webapp_name: ''
table_name: 'taximonitoring'
# sp_client_id: ''
# sp_client_secret: ''
# tenant_id: ''
outputs:
train_data:
trained_model:
@ -34,17 +37,23 @@ jobs:
display_name: create-feathr-config
code: ../../../data-science/src
command: >-
echo TEST:$(key_vault_name) $(adls_account) $key &&
python create_feathr_config.py
--key_vault_name $(key_vault_name)
--synapse_workspace_url $(synapse_workspace_url)
--adls_account $(adls_account)
--adls_fs_name $(adls_fs_name)
--redis_name $(redis_name)
--webapp_name $(webapp_name)
--key_vault_name ${{inputs.key_vault_name}}
--synapse_workspace_name ${{inputs.synapse_workspace_name}}
--adls_account ${{inputs.adls_account}}
--adls_fs_name ${{inputs.adls_fs_name}}
--redis_name ${{inputs.redis_name}}
--webapp_name ${{inputs.webapp_name}}
--project_name 'feathr_demo'
--spark_cluster 'azure_synapse'
--feathr_config_path 'feathr_config.yaml'
inputs:
key_vault_name: ${{parent.inputs.key_vault_name}}
synapse_workspace_name: ${{parent.inputs.synapse_workspace_name}}
adls_account: ${{parent.inputs.adls_account}}
adls_fs_name: ${{parent.inputs.adls_fs_name}}
redis_name: ${{parent.inputs.redis_name}}
webapp_name: ${{parent.inputs.webapp_name}}
environment: azureml:feathr-demo-env@latest
prep_data:
@ -54,16 +63,27 @@ jobs:
command: >-
pip install -r requirements/feathr-requirements.txt --use-deprecated=legacy-resolver &&
python prep.py
--key_vault_name $(key_vault_name)
--synapse_workspace_url $(synapse_workspace_url)
--adls_account $(adls_account)
--adls_fs_name $(adls_fs_name)
--webapp_name $(webapp_name)
--key_vault_name ${{inputs.key_vault_name}}
--synapse_workspace_name ${{inputs.synapse_workspace_name}}
--adls_account ${{inputs.adls_account}}
--adls_fs_name ${{inputs.adls_fs_name}}
--webapp_name ${{inputs.webapp_name}}
--raw_data ${{inputs.raw_data}}
--train_data ${{outputs.train_data}}]
--train_data ${{outputs.train_data}}
# --sp_client_id ${{inputs.sp_client_id}}
# --sp_client_secret ${{inputs.sp_client_secret}}
# --tenant_id ${{inputs.tenant_id}}
environment: azureml:feathr-demo-env@latest
inputs:
key_vault_name: ${{parent.inputs.key_vault_name}}
synapse_workspace_name: ${{parent.inputs.synapse_workspace_name}}
adls_account: ${{parent.inputs.adls_account}}
adls_fs_name: ${{parent.inputs.adls_fs_name}}
webapp_name: ${{parent.inputs.webapp_name}}
raw_data: ${{parent.inputs.input}}
# sp_client_id: ${{parent.inputs.sp_client_id}}
# sp_client_secret: ${{parent.inputs.sp_client_secret}}
# tenant_id: ${{parent.inputs.tenant_id}}
outputs:
train_data: ${{parent.outputs.train_data}}

Просмотреть файл

@ -26,6 +26,8 @@ resources:
type: github
ref: feature/feathr
stages:
- stage: DeployTrainingPipeline
displayName: Deploy Training Pipeline
@ -56,6 +58,9 @@ stages:
data_type: uri_file
data_name: feathr-data
data_file: mlops/azureml/train/data.yml
# - template: templates/${{ variables.version }}/create-service-principal.yml@mlops-templates
# parameters:
# sp_name: $(sp_name)
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
parameters:
pipeline_file: mlops/azureml/train/pipeline.yml
@ -63,8 +68,11 @@ stages:
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
enable_monitoring: $(enable_monitoring)
key_vault_name: $(key_vault_name)
synapse_workspace_url: $(synapse_workspace_url)
synapse_workspace_name: $(synapse_workspace_name)
adls_account: $(adls_account)
adls_fs_name: $(adls_fs_name)
redis_name: $(redis_name)
webapp_name: $(webapp_name)
# sp_client_id: $(service_principal_id)
# sp_client_secret: $(service_principal_secret)
# tenant_id: $(tenant_id)

Просмотреть файл

@ -6,9 +6,9 @@ variables:
# Global
ap_vm_image: ubuntu-20.04
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
postfix: 001
location: eastus
namespace: rizo13 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
postfix: 002
location: canadacentral
environment: dev
enable_aml_computecluster: true
enable_monitoring: false
@ -20,9 +20,9 @@ variables:
feathr_app_image: feathrfeaturestore/feathr-registry
feathr_app_image_tag: releases-v0.9.0
react_enable_rbac: true
aad_client_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # refer to prefeaturestore readme for details
aad_client_id: 2327c20b-fa88-46eb-8a3a-57283a716619 # refer to prefeaturestore readme for details
spark_version: 3.1
priviledged_object_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
priviledged_object_id: 7cbdebc0-e724-4683-9094-4c0cb8a4266f # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
# added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta
# remove the environment variable when this is no longer required
ARM_THREEPOINTZERO_BETA_RESOURCES: true
@ -59,8 +59,9 @@ variables:
# For feathr reference
resource_prefix: $(namespace)fs
key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment)
synapse_workspace_url: sy$(resource_prefix)-$(postfix)$(environment)
synapse_workspace_name: sy$(resource_prefix)-$(postfix)$(environment)
adls_account: st$(resource_prefix)$(postfix)$(environment)
adls_fs_name: dl$(resource_prefix)$(postfix)$(environment)
redis_name: rd$(resource_prefix)$(postfix)$(environment)
webapp_name: app$(resource_prefix)$(postfix)$(environment)
# sp_name: sp$(resource_prefix)$(postfix)$(environment)

Просмотреть файл

@ -6,9 +6,9 @@ variables:
# Global
ap_vm_image: ubuntu-20.04
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
postfix: 001
location: eastus
namespace: rizo13 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
postfix: 002
location: canadacentral
environment: prod
enable_aml_computecluster: true
enable_monitoring: false
@ -20,9 +20,9 @@ variables:
feathr_app_image: feathrfeaturestore/feathr-registry
feathr_app_image_tag: releases-v0.9.0
react_enable_rbac: true
aad_client_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # refer to prefeaturestore readme for details
aad_client_id: 2327c20b-fa88-46eb-8a3a-57283a716619 # refer to prefeaturestore readme for details
spark_version: 3.1
priviledged_object_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
priviledged_object_id: 7cbdebc0-e724-4683-9094-4c0cb8a4266f # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
# added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta
# remove the environment variable when this is no longer required
ARM_THREEPOINTZERO_BETA_RESOURCES: true
@ -59,8 +59,9 @@ variables:
# For feathr reference
resource_prefix: $(namespace)fs
key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment)
synapse_workspace_url: sy$(resource_prefix)-$(postfix)$(environment)
synapse_workspace_name: sy$(resource_prefix)-$(postfix)$(environment)
adls_account: st$(resource_prefix)$(postfix)$(environment)
adls_fs_name: dl$(resource_prefix)$(postfix)$(environment)
redis_name: rd$(resource_prefix)$(postfix)$(environment)
webapp_name: app$(resource_prefix)$(postfix)$(environment)
# sp_name: sp$(resource_prefix)$(postfix)$(environment)