Debugging to solve FeathrClient authorization error

This commit is contained in:
Rizo-R 2023-03-22 14:31:16 -07:00
Родитель 4fc9499a93
Коммит aa47471960
6 изменённых файлов: 89 добавлений и 40 удалений

Просмотреть файл

@ -46,12 +46,15 @@ def parse_args():
# parser.add_argument("--azure_client_id", type=str) # parser.add_argument("--azure_client_id", type=str)
# parser.add_argument("--azure_tenant_id", type=str) # parser.add_argument("--azure_tenant_id", type=str)
parser.add_argument("--key_vault_name", type=str) parser.add_argument("--key_vault_name", type=str)
parser.add_argument("--synapse_workspace_url", type=str) parser.add_argument("--synapse_workspace_name", type=str)
parser.add_argument("--adls_account", type=str) parser.add_argument("--adls_account", type=str)
parser.add_argument("--adls_fs_name", type=str) parser.add_argument("--adls_fs_name", type=str)
parser.add_argument("--webapp_name", type=str) parser.add_argument("--webapp_name", type=str)
parser.add_argument("--raw_data", type=str) parser.add_argument("--raw_data", type=str)
parser.add_argument("--train_data", type=str) parser.add_argument("--train_data", type=str)
parser.add_argument("--sp_client_id", type=str)
parser.add_argument("--sp_client_secret", type=str)
parser.add_argument("--tenant_id", type=str)
args = parser.parse_args() args = parser.parse_args()
return args return args
@ -65,8 +68,9 @@ def set_environment_variables():
# os.environ['AZURE_TENANT_ID'] = utils.fs_config.get("tenant_id") # os.environ['AZURE_TENANT_ID'] = utils.fs_config.get("tenant_id")
# # TODO: add client secret, adls key to environment variables # # TODO: add client secret, adls key to environment variables
# os.environ['AZURE_CLIENT_ID'] = args.azure_client_id os.environ['AZURE_CLIENT_ID'] = args.sp_client_id
# os.environ['AZURE_TENANT_ID'] = args.azure_tenant_id os.environ['AZURE_CLIENT_SECRET'] = args.sp_client_secret
os.environ['AZURE_TENANT_ID'] = args.tenant_id
os.environ['ADLS_ACCOUNT'] = args.adls_account os.environ['ADLS_ACCOUNT'] = args.adls_account
def set_spark_session(): def set_spark_session():
@ -98,7 +102,9 @@ def get_data_source_path(feathr_client):
def main(args): def main(args):
feathr_client = utils.get_feathr_client(key_vault_name=args.key_vault_name, synapse_workspace_url=args.synapse_workspace_url, adls_account=args.adls_account, adls_fs_name=args.adls_fs_name, webapp_name=args.webapp_name) import feathr
print("Feathr version:", feathr.__version__)
feathr_client = utils.get_feathr_client(key_vault_name=args.key_vault_name, synapse_workspace_name=args.synapse_workspace_name, adls_account=args.adls_account, adls_fs_name=args.adls_fs_name, webapp_name=args.webapp_name)
set_spark_session() set_spark_session()
data_source_path = get_data_source_path(feathr_client) data_source_path = get_data_source_path(feathr_client)

Просмотреть файл

@ -2,7 +2,7 @@ import yaml
import os import os
import logging import logging
from pathlib import Path from pathlib import Path
from azure.identity import DefaultAzureCredential from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
from azure.keyvault.secrets import SecretClient from azure.keyvault.secrets import SecretClient
from feathr import FeathrClient from feathr import FeathrClient
@ -29,16 +29,19 @@ logging.basicConfig(
def get_credential(): def get_credential():
credential = DefaultAzureCredential( credential = DefaultAzureCredential(
exclude_interactive_browser_credential=True) exclude_interactive_browser_credential=True)
# client_id = "c3830013-88b5-402a-b256-d5031e4f3b19"
# credential = ManagedIdentityCredential(client_id=client_id)
return credential return credential
def set_required_feathr_config( def set_required_feathr_config(
key_vault_name: str, key_vault_name: str,
synapse_workspace_url: str, synapse_workspace_name: str,
adls_account: str, adls_account: str,
adls_fs_name: str, adls_fs_name: str,
webapp_name: str, webapp_name: str,
credential: DefaultAzureCredential # credential: DefaultAzureCredential
credential
): ):
# # Get all the required credentials from Azure Key Vault # # Get all the required credentials from Azure Key Vault
@ -52,9 +55,19 @@ def set_required_feathr_config(
# # synapse_workspace_url = resource_prefix + "syws" # # synapse_workspace_url = resource_prefix + "syws"
# # adls_account = resource_prefix + "dls" # # adls_account = resource_prefix + "dls"
# # adls_fs_name = resource_prefix + "fs" # # adls_fs_name = resource_prefix + "fs"
# Check if given credential can get token successfully.
print(type(credential))
print("Environment variables: {")
for k, v in os.environ.items():
print(f"{k}: {v}")
print("}")
# print("Attempting to get access token...")
# print("Access token:", credential.get_token("https://management.azure.com/.default"))
# print("Got access token!")
print()
key_vault_uri = f"https://{key_vault_name}.vault.azure.net" key_vault_url = f"https://{key_vault_name}.vault.azure.net"
client = SecretClient(vault_url=key_vault_uri, credential=credential) client = SecretClient(vault_url=key_vault_url, credential=credential)
secretName = "FEATHR-ONLINE-STORE-CONN" secretName = "FEATHR-ONLINE-STORE-CONN"
retrieved_secret = str(client.get_secret(secretName).value) retrieved_secret = str(client.get_secret(secretName).value)
@ -65,7 +78,7 @@ def set_required_feathr_config(
redis_ssl = retrieved_secret.split(',')[2].split("ssl=", 1)[1] redis_ssl = retrieved_secret.split(',')[2].split("ssl=", 1)[1]
# Set appropriate environment variables for overriding feathr config # Set appropriate environment variables for overriding feathr config
os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_url}.dev.azuresynapse.net' os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_name}.dev.azuresynapse.net'
os.environ['spark_config__azure_synapse__pool_name'] = 'spdev' os.environ['spark_config__azure_synapse__pool_name'] = 'spdev'
os.environ['spark_config__azure_synapse__workspace_dir'] = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_project' os.environ['spark_config__azure_synapse__workspace_dir'] = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_project'
os.environ['online_store__redis__host'] = redis_host os.environ['online_store__redis__host'] = redis_host
@ -77,13 +90,13 @@ def set_required_feathr_config(
def get_feathr_client( def get_feathr_client(
key_vault_name: str, key_vault_name: str,
synapse_workspace_url: str, synapse_workspace_name: str,
adls_account: str, adls_account: str,
adls_fs_name: str, adls_fs_name: str,
webapp_name: str, webapp_name: str,
): ):
credential = get_credential() credential = get_credential()
set_required_feathr_config(key_vault_name=key_vault_name, synapse_workspace_url=synapse_workspace_url, adls_account=adls_account, adls_fs_name=adls_fs_name, webapp_name=webapp_name, credential=credential) set_required_feathr_config(key_vault_name=key_vault_name, synapse_workspace_name=synapse_workspace_name, adls_account=adls_account, adls_fs_name=adls_fs_name, webapp_name=webapp_name, credential=credential)
config_file_path = os.path.join( config_file_path = os.path.join(
Path(__file__).parent, "feathr_config.yaml") Path(__file__).parent, "feathr_config.yaml")
logging.info("config path: {}".format(config_file_path)) logging.info("config path: {}".format(config_file_path))

Просмотреть файл

@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json $schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
type: pipeline type: pipeline
experiment_name: experiment_name: ''
# description: Training Pipeline to train a model that predicts taxi fare price, using Feathr as feature store # description: Training Pipeline to train a model that predicts taxi fare price, using Feathr as feature store
# <inputs_and_outputs> # <inputs_and_outputs>
@ -10,12 +10,15 @@ inputs:
path: azureml:feathr-data@latest path: azureml:feathr-data@latest
enable_monitoring: $enable_monitoring enable_monitoring: $enable_monitoring
key_vault_name: '' key_vault_name: ''
synapse_workspace_url: '' synapse_workspace_name: ''
adls_account: '' adls_account: ''
adls_fs_name: '' adls_fs_name: ''
redis_name: '' redis_name: ''
webapp_name: '' webapp_name: ''
table_name: 'taximonitoring' table_name: 'taximonitoring'
# sp_client_id: ''
# sp_client_secret: ''
# tenant_id: ''
outputs: outputs:
train_data: train_data:
trained_model: trained_model:
@ -34,17 +37,23 @@ jobs:
display_name: create-feathr-config display_name: create-feathr-config
code: ../../../data-science/src code: ../../../data-science/src
command: >- command: >-
echo TEST:$(key_vault_name) $(adls_account) $key &&
python create_feathr_config.py python create_feathr_config.py
--key_vault_name $(key_vault_name) --key_vault_name ${{inputs.key_vault_name}}
--synapse_workspace_url $(synapse_workspace_url) --synapse_workspace_name ${{inputs.synapse_workspace_name}}
--adls_account $(adls_account) --adls_account ${{inputs.adls_account}}
--adls_fs_name $(adls_fs_name) --adls_fs_name ${{inputs.adls_fs_name}}
--redis_name $(redis_name) --redis_name ${{inputs.redis_name}}
--webapp_name $(webapp_name) --webapp_name ${{inputs.webapp_name}}
--project_name 'feathr_demo' --project_name 'feathr_demo'
--spark_cluster 'azure_synapse' --spark_cluster 'azure_synapse'
--feathr_config_path 'feathr_config.yaml' --feathr_config_path 'feathr_config.yaml'
inputs:
key_vault_name: ${{parent.inputs.key_vault_name}}
synapse_workspace_name: ${{parent.inputs.synapse_workspace_name}}
adls_account: ${{parent.inputs.adls_account}}
adls_fs_name: ${{parent.inputs.adls_fs_name}}
redis_name: ${{parent.inputs.redis_name}}
webapp_name: ${{parent.inputs.webapp_name}}
environment: azureml:feathr-demo-env@latest environment: azureml:feathr-demo-env@latest
prep_data: prep_data:
@ -54,16 +63,27 @@ jobs:
command: >- command: >-
pip install -r requirements/feathr-requirements.txt --use-deprecated=legacy-resolver && pip install -r requirements/feathr-requirements.txt --use-deprecated=legacy-resolver &&
python prep.py python prep.py
--key_vault_name $(key_vault_name) --key_vault_name ${{inputs.key_vault_name}}
--synapse_workspace_url $(synapse_workspace_url) --synapse_workspace_name ${{inputs.synapse_workspace_name}}
--adls_account $(adls_account) --adls_account ${{inputs.adls_account}}
--adls_fs_name $(adls_fs_name) --adls_fs_name ${{inputs.adls_fs_name}}
--webapp_name $(webapp_name) --webapp_name ${{inputs.webapp_name}}
--raw_data ${{inputs.raw_data}} --raw_data ${{inputs.raw_data}}
--train_data ${{outputs.train_data}}] --train_data ${{outputs.train_data}}
# --sp_client_id ${{inputs.sp_client_id}}
# --sp_client_secret ${{inputs.sp_client_secret}}
# --tenant_id ${{inputs.tenant_id}}
environment: azureml:feathr-demo-env@latest environment: azureml:feathr-demo-env@latest
inputs: inputs:
key_vault_name: ${{parent.inputs.key_vault_name}}
synapse_workspace_name: ${{parent.inputs.synapse_workspace_name}}
adls_account: ${{parent.inputs.adls_account}}
adls_fs_name: ${{parent.inputs.adls_fs_name}}
webapp_name: ${{parent.inputs.webapp_name}}
raw_data: ${{parent.inputs.input}} raw_data: ${{parent.inputs.input}}
# sp_client_id: ${{parent.inputs.sp_client_id}}
# sp_client_secret: ${{parent.inputs.sp_client_secret}}
# tenant_id: ${{parent.inputs.tenant_id}}
outputs: outputs:
train_data: ${{parent.outputs.train_data}} train_data: ${{parent.outputs.train_data}}

Просмотреть файл

@ -26,6 +26,8 @@ resources:
type: github type: github
ref: feature/feathr ref: feature/feathr
stages: stages:
- stage: DeployTrainingPipeline - stage: DeployTrainingPipeline
displayName: Deploy Training Pipeline displayName: Deploy Training Pipeline
@ -56,6 +58,9 @@ stages:
data_type: uri_file data_type: uri_file
data_name: feathr-data data_name: feathr-data
data_file: mlops/azureml/train/data.yml data_file: mlops/azureml/train/data.yml
# - template: templates/${{ variables.version }}/create-service-principal.yml@mlops-templates
# parameters:
# sp_name: $(sp_name)
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates - template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
parameters: parameters:
pipeline_file: mlops/azureml/train/pipeline.yml pipeline_file: mlops/azureml/train/pipeline.yml
@ -63,8 +68,11 @@ stages:
display_name: $(environment)_taxi_fare_run_$(Build.BuildID) display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
enable_monitoring: $(enable_monitoring) enable_monitoring: $(enable_monitoring)
key_vault_name: $(key_vault_name) key_vault_name: $(key_vault_name)
synapse_workspace_url: $(synapse_workspace_url) synapse_workspace_name: $(synapse_workspace_name)
adls_account: $(adls_account) adls_account: $(adls_account)
adls_fs_name: $(adls_fs_name) adls_fs_name: $(adls_fs_name)
redis_name: $(redis_name) redis_name: $(redis_name)
webapp_name: $(webapp_name) webapp_name: $(webapp_name)
# sp_client_id: $(service_principal_id)
# sp_client_secret: $(service_principal_secret)
# tenant_id: $(tenant_id)

Просмотреть файл

@ -6,9 +6,9 @@ variables:
# Global # Global
ap_vm_image: ubuntu-20.04 ap_vm_image: ubuntu-20.04
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters. namespace: rizo13 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
postfix: 001 postfix: 002
location: eastus location: canadacentral
environment: dev environment: dev
enable_aml_computecluster: true enable_aml_computecluster: true
enable_monitoring: false enable_monitoring: false
@ -20,9 +20,9 @@ variables:
feathr_app_image: feathrfeaturestore/feathr-registry feathr_app_image: feathrfeaturestore/feathr-registry
feathr_app_image_tag: releases-v0.9.0 feathr_app_image_tag: releases-v0.9.0
react_enable_rbac: true react_enable_rbac: true
aad_client_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # refer to prefeaturestore readme for details aad_client_id: 2327c20b-fa88-46eb-8a3a-57283a716619 # refer to prefeaturestore readme for details
spark_version: 3.1 spark_version: 3.1
priviledged_object_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components priviledged_object_id: 7cbdebc0-e724-4683-9094-4c0cb8a4266f # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
# added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta # added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta
# remove the environment variable when this is no longer required # remove the environment variable when this is no longer required
ARM_THREEPOINTZERO_BETA_RESOURCES: true ARM_THREEPOINTZERO_BETA_RESOURCES: true
@ -59,8 +59,9 @@ variables:
# For feathr reference # For feathr reference
resource_prefix: $(namespace)fs resource_prefix: $(namespace)fs
key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment) key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment)
synapse_workspace_url: sy$(resource_prefix)-$(postfix)$(environment) synapse_workspace_name: sy$(resource_prefix)-$(postfix)$(environment)
adls_account: st$(resource_prefix)$(postfix)$(environment) adls_account: st$(resource_prefix)$(postfix)$(environment)
adls_fs_name: dl$(resource_prefix)$(postfix)$(environment) adls_fs_name: dl$(resource_prefix)$(postfix)$(environment)
redis_name: rd$(resource_prefix)$(postfix)$(environment) redis_name: rd$(resource_prefix)$(postfix)$(environment)
webapp_name: app$(resource_prefix)$(postfix)$(environment) webapp_name: app$(resource_prefix)$(postfix)$(environment)
# sp_name: sp$(resource_prefix)$(postfix)$(environment)

Просмотреть файл

@ -6,9 +6,9 @@ variables:
# Global # Global
ap_vm_image: ubuntu-20.04 ap_vm_image: ubuntu-20.04
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters. namespace: rizo13 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
postfix: 001 postfix: 002
location: eastus location: canadacentral
environment: prod environment: prod
enable_aml_computecluster: true enable_aml_computecluster: true
enable_monitoring: false enable_monitoring: false
@ -20,9 +20,9 @@ variables:
feathr_app_image: feathrfeaturestore/feathr-registry feathr_app_image: feathrfeaturestore/feathr-registry
feathr_app_image_tag: releases-v0.9.0 feathr_app_image_tag: releases-v0.9.0
react_enable_rbac: true react_enable_rbac: true
aad_client_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # refer to prefeaturestore readme for details aad_client_id: 2327c20b-fa88-46eb-8a3a-57283a716619 # refer to prefeaturestore readme for details
spark_version: 3.1 spark_version: 3.1
priviledged_object_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components priviledged_object_id: 7cbdebc0-e724-4683-9094-4c0cb8a4266f # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
# added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta # added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta
# remove the environment variable when this is no longer required # remove the environment variable when this is no longer required
ARM_THREEPOINTZERO_BETA_RESOURCES: true ARM_THREEPOINTZERO_BETA_RESOURCES: true
@ -59,8 +59,9 @@ variables:
# For feathr reference # For feathr reference
resource_prefix: $(namespace)fs resource_prefix: $(namespace)fs
key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment) key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment)
synapse_workspace_url: sy$(resource_prefix)-$(postfix)$(environment) synapse_workspace_name: sy$(resource_prefix)-$(postfix)$(environment)
adls_account: st$(resource_prefix)$(postfix)$(environment) adls_account: st$(resource_prefix)$(postfix)$(environment)
adls_fs_name: dl$(resource_prefix)$(postfix)$(environment) adls_fs_name: dl$(resource_prefix)$(postfix)$(environment)
redis_name: rd$(resource_prefix)$(postfix)$(environment) redis_name: rd$(resource_prefix)$(postfix)$(environment)
webapp_name: app$(resource_prefix)$(postfix)$(environment) webapp_name: app$(resource_prefix)$(postfix)$(environment)
# sp_name: sp$(resource_prefix)$(postfix)$(environment)