Debugging to solve FeathrClient authorization error
This commit is contained in:
Родитель
4fc9499a93
Коммит
aa47471960
|
@ -46,12 +46,15 @@ def parse_args():
|
|||
# parser.add_argument("--azure_client_id", type=str)
|
||||
# parser.add_argument("--azure_tenant_id", type=str)
|
||||
parser.add_argument("--key_vault_name", type=str)
|
||||
parser.add_argument("--synapse_workspace_url", type=str)
|
||||
parser.add_argument("--synapse_workspace_name", type=str)
|
||||
parser.add_argument("--adls_account", type=str)
|
||||
parser.add_argument("--adls_fs_name", type=str)
|
||||
parser.add_argument("--webapp_name", type=str)
|
||||
parser.add_argument("--raw_data", type=str)
|
||||
parser.add_argument("--train_data", type=str)
|
||||
parser.add_argument("--sp_client_id", type=str)
|
||||
parser.add_argument("--sp_client_secret", type=str)
|
||||
parser.add_argument("--tenant_id", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
@ -65,8 +68,9 @@ def set_environment_variables():
|
|||
# os.environ['AZURE_TENANT_ID'] = utils.fs_config.get("tenant_id")
|
||||
|
||||
# # TODO: add client secret, adls key to environment variables
|
||||
# os.environ['AZURE_CLIENT_ID'] = args.azure_client_id
|
||||
# os.environ['AZURE_TENANT_ID'] = args.azure_tenant_id
|
||||
os.environ['AZURE_CLIENT_ID'] = args.sp_client_id
|
||||
os.environ['AZURE_CLIENT_SECRET'] = args.sp_client_secret
|
||||
os.environ['AZURE_TENANT_ID'] = args.tenant_id
|
||||
os.environ['ADLS_ACCOUNT'] = args.adls_account
|
||||
|
||||
def set_spark_session():
|
||||
|
@ -98,7 +102,9 @@ def get_data_source_path(feathr_client):
|
|||
|
||||
|
||||
def main(args):
|
||||
feathr_client = utils.get_feathr_client(key_vault_name=args.key_vault_name, synapse_workspace_url=args.synapse_workspace_url, adls_account=args.adls_account, adls_fs_name=args.adls_fs_name, webapp_name=args.webapp_name)
|
||||
import feathr
|
||||
print("Feathr version:", feathr.__version__)
|
||||
feathr_client = utils.get_feathr_client(key_vault_name=args.key_vault_name, synapse_workspace_name=args.synapse_workspace_name, adls_account=args.adls_account, adls_fs_name=args.adls_fs_name, webapp_name=args.webapp_name)
|
||||
|
||||
set_spark_session()
|
||||
data_source_path = get_data_source_path(feathr_client)
|
||||
|
|
|
@ -2,7 +2,7 @@ import yaml
|
|||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from azure.identity import DefaultAzureCredential
|
||||
from azure.identity import DefaultAzureCredential, ManagedIdentityCredential
|
||||
from azure.keyvault.secrets import SecretClient
|
||||
from feathr import FeathrClient
|
||||
|
||||
|
@ -29,16 +29,19 @@ logging.basicConfig(
|
|||
def get_credential():
|
||||
credential = DefaultAzureCredential(
|
||||
exclude_interactive_browser_credential=True)
|
||||
# client_id = "c3830013-88b5-402a-b256-d5031e4f3b19"
|
||||
# credential = ManagedIdentityCredential(client_id=client_id)
|
||||
return credential
|
||||
|
||||
|
||||
def set_required_feathr_config(
|
||||
key_vault_name: str,
|
||||
synapse_workspace_url: str,
|
||||
synapse_workspace_name: str,
|
||||
adls_account: str,
|
||||
adls_fs_name: str,
|
||||
webapp_name: str,
|
||||
credential: DefaultAzureCredential
|
||||
# credential: DefaultAzureCredential
|
||||
credential
|
||||
):
|
||||
|
||||
# # Get all the required credentials from Azure Key Vault
|
||||
|
@ -52,9 +55,19 @@ def set_required_feathr_config(
|
|||
# # synapse_workspace_url = resource_prefix + "syws"
|
||||
# # adls_account = resource_prefix + "dls"
|
||||
# # adls_fs_name = resource_prefix + "fs"
|
||||
# Check if given credential can get token successfully.
|
||||
print(type(credential))
|
||||
print("Environment variables: {")
|
||||
for k, v in os.environ.items():
|
||||
print(f"{k}: {v}")
|
||||
print("}")
|
||||
# print("Attempting to get access token...")
|
||||
# print("Access token:", credential.get_token("https://management.azure.com/.default"))
|
||||
# print("Got access token!")
|
||||
print()
|
||||
|
||||
key_vault_uri = f"https://{key_vault_name}.vault.azure.net"
|
||||
client = SecretClient(vault_url=key_vault_uri, credential=credential)
|
||||
key_vault_url = f"https://{key_vault_name}.vault.azure.net"
|
||||
client = SecretClient(vault_url=key_vault_url, credential=credential)
|
||||
secretName = "FEATHR-ONLINE-STORE-CONN"
|
||||
retrieved_secret = str(client.get_secret(secretName).value)
|
||||
|
||||
|
@ -65,7 +78,7 @@ def set_required_feathr_config(
|
|||
redis_ssl = retrieved_secret.split(',')[2].split("ssl=", 1)[1]
|
||||
|
||||
# Set appropriate environment variables for overriding feathr config
|
||||
os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_url}.dev.azuresynapse.net'
|
||||
os.environ['spark_config__azure_synapse__dev_url'] = f'https://{synapse_workspace_name}.dev.azuresynapse.net'
|
||||
os.environ['spark_config__azure_synapse__pool_name'] = 'spdev'
|
||||
os.environ['spark_config__azure_synapse__workspace_dir'] = f'abfss://{adls_fs_name}@{adls_account}.dfs.core.windows.net/feathr_project'
|
||||
os.environ['online_store__redis__host'] = redis_host
|
||||
|
@ -77,13 +90,13 @@ def set_required_feathr_config(
|
|||
|
||||
def get_feathr_client(
|
||||
key_vault_name: str,
|
||||
synapse_workspace_url: str,
|
||||
synapse_workspace_name: str,
|
||||
adls_account: str,
|
||||
adls_fs_name: str,
|
||||
webapp_name: str,
|
||||
):
|
||||
credential = get_credential()
|
||||
set_required_feathr_config(key_vault_name=key_vault_name, synapse_workspace_url=synapse_workspace_url, adls_account=adls_account, adls_fs_name=adls_fs_name, webapp_name=webapp_name, credential=credential)
|
||||
set_required_feathr_config(key_vault_name=key_vault_name, synapse_workspace_name=synapse_workspace_name, adls_account=adls_account, adls_fs_name=adls_fs_name, webapp_name=webapp_name, credential=credential)
|
||||
config_file_path = os.path.join(
|
||||
Path(__file__).parent, "feathr_config.yaml")
|
||||
logging.info("config path: {}".format(config_file_path))
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
|
||||
type: pipeline
|
||||
experiment_name:
|
||||
experiment_name: ''
|
||||
# description: Training Pipeline to train a model that predicts taxi fare price, using Feathr as feature store
|
||||
|
||||
# <inputs_and_outputs>
|
||||
|
@ -10,12 +10,15 @@ inputs:
|
|||
path: azureml:feathr-data@latest
|
||||
enable_monitoring: $enable_monitoring
|
||||
key_vault_name: ''
|
||||
synapse_workspace_url: ''
|
||||
synapse_workspace_name: ''
|
||||
adls_account: ''
|
||||
adls_fs_name: ''
|
||||
redis_name: ''
|
||||
webapp_name: ''
|
||||
table_name: 'taximonitoring'
|
||||
# sp_client_id: ''
|
||||
# sp_client_secret: ''
|
||||
# tenant_id: ''
|
||||
outputs:
|
||||
train_data:
|
||||
trained_model:
|
||||
|
@ -34,17 +37,23 @@ jobs:
|
|||
display_name: create-feathr-config
|
||||
code: ../../../data-science/src
|
||||
command: >-
|
||||
echo TEST:$(key_vault_name) $(adls_account) $key &&
|
||||
python create_feathr_config.py
|
||||
--key_vault_name $(key_vault_name)
|
||||
--synapse_workspace_url $(synapse_workspace_url)
|
||||
--adls_account $(adls_account)
|
||||
--adls_fs_name $(adls_fs_name)
|
||||
--redis_name $(redis_name)
|
||||
--webapp_name $(webapp_name)
|
||||
--key_vault_name ${{inputs.key_vault_name}}
|
||||
--synapse_workspace_name ${{inputs.synapse_workspace_name}}
|
||||
--adls_account ${{inputs.adls_account}}
|
||||
--adls_fs_name ${{inputs.adls_fs_name}}
|
||||
--redis_name ${{inputs.redis_name}}
|
||||
--webapp_name ${{inputs.webapp_name}}
|
||||
--project_name 'feathr_demo'
|
||||
--spark_cluster 'azure_synapse'
|
||||
--feathr_config_path 'feathr_config.yaml'
|
||||
inputs:
|
||||
key_vault_name: ${{parent.inputs.key_vault_name}}
|
||||
synapse_workspace_name: ${{parent.inputs.synapse_workspace_name}}
|
||||
adls_account: ${{parent.inputs.adls_account}}
|
||||
adls_fs_name: ${{parent.inputs.adls_fs_name}}
|
||||
redis_name: ${{parent.inputs.redis_name}}
|
||||
webapp_name: ${{parent.inputs.webapp_name}}
|
||||
environment: azureml:feathr-demo-env@latest
|
||||
|
||||
prep_data:
|
||||
|
@ -54,16 +63,27 @@ jobs:
|
|||
command: >-
|
||||
pip install -r requirements/feathr-requirements.txt --use-deprecated=legacy-resolver &&
|
||||
python prep.py
|
||||
--key_vault_name $(key_vault_name)
|
||||
--synapse_workspace_url $(synapse_workspace_url)
|
||||
--adls_account $(adls_account)
|
||||
--adls_fs_name $(adls_fs_name)
|
||||
--webapp_name $(webapp_name)
|
||||
--key_vault_name ${{inputs.key_vault_name}}
|
||||
--synapse_workspace_name ${{inputs.synapse_workspace_name}}
|
||||
--adls_account ${{inputs.adls_account}}
|
||||
--adls_fs_name ${{inputs.adls_fs_name}}
|
||||
--webapp_name ${{inputs.webapp_name}}
|
||||
--raw_data ${{inputs.raw_data}}
|
||||
--train_data ${{outputs.train_data}}]
|
||||
--train_data ${{outputs.train_data}}
|
||||
# --sp_client_id ${{inputs.sp_client_id}}
|
||||
# --sp_client_secret ${{inputs.sp_client_secret}}
|
||||
# --tenant_id ${{inputs.tenant_id}}
|
||||
environment: azureml:feathr-demo-env@latest
|
||||
inputs:
|
||||
key_vault_name: ${{parent.inputs.key_vault_name}}
|
||||
synapse_workspace_name: ${{parent.inputs.synapse_workspace_name}}
|
||||
adls_account: ${{parent.inputs.adls_account}}
|
||||
adls_fs_name: ${{parent.inputs.adls_fs_name}}
|
||||
webapp_name: ${{parent.inputs.webapp_name}}
|
||||
raw_data: ${{parent.inputs.input}}
|
||||
# sp_client_id: ${{parent.inputs.sp_client_id}}
|
||||
# sp_client_secret: ${{parent.inputs.sp_client_secret}}
|
||||
# tenant_id: ${{parent.inputs.tenant_id}}
|
||||
outputs:
|
||||
train_data: ${{parent.outputs.train_data}}
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@ resources:
|
|||
type: github
|
||||
ref: feature/feathr
|
||||
|
||||
|
||||
|
||||
stages:
|
||||
- stage: DeployTrainingPipeline
|
||||
displayName: Deploy Training Pipeline
|
||||
|
@ -56,6 +58,9 @@ stages:
|
|||
data_type: uri_file
|
||||
data_name: feathr-data
|
||||
data_file: mlops/azureml/train/data.yml
|
||||
# - template: templates/${{ variables.version }}/create-service-principal.yml@mlops-templates
|
||||
# parameters:
|
||||
# sp_name: $(sp_name)
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_file: mlops/azureml/train/pipeline.yml
|
||||
|
@ -63,8 +68,11 @@ stages:
|
|||
display_name: $(environment)_taxi_fare_run_$(Build.BuildID)
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
key_vault_name: $(key_vault_name)
|
||||
synapse_workspace_url: $(synapse_workspace_url)
|
||||
synapse_workspace_name: $(synapse_workspace_name)
|
||||
adls_account: $(adls_account)
|
||||
adls_fs_name: $(adls_fs_name)
|
||||
redis_name: $(redis_name)
|
||||
webapp_name: $(webapp_name)
|
||||
# sp_client_id: $(service_principal_id)
|
||||
# sp_client_secret: $(service_principal_secret)
|
||||
# tenant_id: $(tenant_id)
|
|
@ -6,9 +6,9 @@ variables:
|
|||
# Global
|
||||
ap_vm_image: ubuntu-20.04
|
||||
|
||||
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
||||
postfix: 001
|
||||
location: eastus
|
||||
namespace: rizo13 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
||||
postfix: 002
|
||||
location: canadacentral
|
||||
environment: dev
|
||||
enable_aml_computecluster: true
|
||||
enable_monitoring: false
|
||||
|
@ -20,9 +20,9 @@ variables:
|
|||
feathr_app_image: feathrfeaturestore/feathr-registry
|
||||
feathr_app_image_tag: releases-v0.9.0
|
||||
react_enable_rbac: true
|
||||
aad_client_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # refer to prefeaturestore readme for details
|
||||
aad_client_id: 2327c20b-fa88-46eb-8a3a-57283a716619 # refer to prefeaturestore readme for details
|
||||
spark_version: 3.1
|
||||
priviledged_object_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
|
||||
priviledged_object_id: 7cbdebc0-e724-4683-9094-4c0cb8a4266f # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
|
||||
# added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta
|
||||
# remove the environment variable when this is no longer required
|
||||
ARM_THREEPOINTZERO_BETA_RESOURCES: true
|
||||
|
@ -59,8 +59,9 @@ variables:
|
|||
# For feathr reference
|
||||
resource_prefix: $(namespace)fs
|
||||
key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment)
|
||||
synapse_workspace_url: sy$(resource_prefix)-$(postfix)$(environment)
|
||||
synapse_workspace_name: sy$(resource_prefix)-$(postfix)$(environment)
|
||||
adls_account: st$(resource_prefix)$(postfix)$(environment)
|
||||
adls_fs_name: dl$(resource_prefix)$(postfix)$(environment)
|
||||
redis_name: rd$(resource_prefix)$(postfix)$(environment)
|
||||
webapp_name: app$(resource_prefix)$(postfix)$(environment)
|
||||
# sp_name: sp$(resource_prefix)$(postfix)$(environment)
|
||||
|
|
|
@ -6,9 +6,9 @@ variables:
|
|||
# Global
|
||||
ap_vm_image: ubuntu-20.04
|
||||
|
||||
namespace: mlopsv2 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
||||
postfix: 001
|
||||
location: eastus
|
||||
namespace: rizo13 #Note: A namespace with many characters will cause storage account creation to fail due to storage account names having a limit of 24 characters.
|
||||
postfix: 002
|
||||
location: canadacentral
|
||||
environment: prod
|
||||
enable_aml_computecluster: true
|
||||
enable_monitoring: false
|
||||
|
@ -20,9 +20,9 @@ variables:
|
|||
feathr_app_image: feathrfeaturestore/feathr-registry
|
||||
feathr_app_image_tag: releases-v0.9.0
|
||||
react_enable_rbac: true
|
||||
aad_client_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # refer to prefeaturestore readme for details
|
||||
aad_client_id: 2327c20b-fa88-46eb-8a3a-57283a716619 # refer to prefeaturestore readme for details
|
||||
spark_version: 3.1
|
||||
priviledged_object_id: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
|
||||
priviledged_object_id: 7cbdebc0-e724-4683-9094-4c0cb8a4266f # AAD Object ID for user, used to set pividges for user in KV and Synapse for feature store components
|
||||
# added flag for this: https://registry.terraform.io/providers/hashicorp/azurerm/2.99.0/docs/guides/3.0-app-service-beta
|
||||
# remove the environment variable when this is no longer required
|
||||
ARM_THREEPOINTZERO_BETA_RESOURCES: true
|
||||
|
@ -59,8 +59,9 @@ variables:
|
|||
# For feathr reference
|
||||
resource_prefix: $(namespace)fs
|
||||
key_vault_name: kv-$(resource_prefix)-$(postfix)$(environment)
|
||||
synapse_workspace_url: sy$(resource_prefix)-$(postfix)$(environment)
|
||||
synapse_workspace_name: sy$(resource_prefix)-$(postfix)$(environment)
|
||||
adls_account: st$(resource_prefix)$(postfix)$(environment)
|
||||
adls_fs_name: dl$(resource_prefix)$(postfix)$(environment)
|
||||
redis_name: rd$(resource_prefix)$(postfix)$(environment)
|
||||
webapp_name: app$(resource_prefix)$(postfix)$(environment)
|
||||
# sp_name: sp$(resource_prefix)$(postfix)$(environment)
|
||||
|
|
Загрузка…
Ссылка в новой задаче