Improved Security in pipeline (#77)

This commit is contained in:
Sushant Divate 2020-07-23 14:33:23 -07:00 коммит произвёл GitHub
Родитель 1561974db5
Коммит 127321dfe1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 110 добавлений и 30 удалений

Просмотреть файл

@ -133,11 +133,20 @@ stages:
fi
python code/pipelineazdo.py
- task: Bash@3
name: "kfpauth"
displayName: "Authenticate KFP"
inputs:
targetType: 'inline'
script: |
pip3 install adal --upgrade
# Avoid returning token from python script directly to avoid it getting exposed over stdoutput
echo "##vso[task.setvariable variable=KFP_API_TOKEN;issecret=true;isOutput=true]$(python code/utils/get_kfp_auth_token.py --tenant $(KF_TENANT_ID) --service_principal $(KF_CLIENT_ID) --sp_secret $(KF_CLIENT_SECRET) 2>&1 >/dev/null)"
- task: KubeflowUploadPipeline@0
displayName: "Upload Pipeline to KubeFlow"
inputs:
kubeflowEndpoint: '$(KF_ENDPOINT)'
bearerToken: '$(KF_API_TOKEN)'
bearerToken: '$(kfpauth.KFP_API_TOKEN)'
kubeflowPipelineTask: '$(KF_UPLOAD_CHOICE)'
pipelineFilePath: '$(KF_PIPELINE_FILE)'
newPipelineName: '$(KF_NEW_PIPELINE_NAME)'
@ -148,7 +157,7 @@ stages:
displayName: "Create Experiment with New Pipeline"
inputs:
kubeflowEndpoint: '$(KF_ENDPOINT)'
bearerToken: '$(KF_API_TOKEN)'
bearerToken: '$(kfpauth.KFP_API_TOKEN)'
pipeline: '$(KF_NEW_PIPELINE_NAME)'
useDefaultVersion: '$(KF_USE_DEFAULT_VERSION)'
pipelineVersion: '$(KF_NEW_PIPELINE_NAME)'
@ -166,7 +175,7 @@ stages:
displayName: "Create Experiment with New Pipeline Version"
inputs:
kubeflowEndpoint: '$(KF_ENDPOINT)'
bearerToken: '$(KF_API_TOKEN)'
bearerToken: '$(kfpauth.KFP_API_TOKEN)'
pipeline: '$(KF_EXISTING_PIPELINE_NAME)'
useDefaultVersion: $(KF_USE_DEFAULT_VERSION)
pipelineVersion: '$(KF_NEW_VERSION_NAME)'
@ -199,15 +208,18 @@ stages:
pool: server
variables:
PIPELINE_VERSION_ID: $[ dependencies.Upload_Pipeline.outputs['setpipelinevars.KFPIPELINEVERSIONID'] ]
EXPERIMENT_ID: $[ dependencies.Upload_Pipeline.outputs['setpipelinevars.KPEXPID'] ]
EXPERIMENT_ID: $[ dependencies.Upload_Pipeline.outputs['setpipelinevars.KPEXPID'] ]
KFP_API_TOKEN: $[ dependencies.Upload_Pipeline.outputs['kfpauth.KFP_API_TOKEN'] ]
steps:
- task: private-kfexperimentrun-async@0
displayName: "Invoke Pipeline"
inputs:
kubeflowEndpoint: '$(KF_ENDPOINT)'
bearerToken: '$(KF_API_TOKEN)'
bearerToken: '$(KFP_API_TOKEN)'
pipelineVersionID: '$(PIPELINE_VERSION_ID)'
experimentID: '$(EXPERIMENT_ID)'
runName: '$(KF_RUN_NAME)'
pipelineParams: '$(KF_PIPELINE_PARAMS2)'
runDescription: "{'build_number':'$(Build.BuildNumber)','repo_name':'$(Build.Repository.Name)','branch_name':'$(Build.SourceBranchName)','queued_by':'$(Build.QueuedBy)'}"

Просмотреть файл

@ -7,7 +7,7 @@ import kfp.compiler as compiler
import kfp.components as components
from kfp.azure import use_azure_secret
from kubernetes.client.models import V1EnvVar
from utils.kfp_helper import use_databricks_secret, use_image
from utils.kfp_helper import use_databricks_secret, use_image, use_kfp_host_secret
persistent_volume_path = '/mnt/azure'
@ -20,7 +20,6 @@ training_dataset = 'train.txt'
model_folder = 'model'
image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood"
mlflow_url = 'http://mlflow:5000'
kfp_host_url = 'http://52.149.63.253/pipeline'
component_root = os.path.join(os.path.dirname(
os.path.abspath(__file__)), ".")
@ -59,12 +58,12 @@ def tacosandburritos_train(
azdocallbackinfo=None
):
exit_handler_op = exit_op(kfp_host_url=kfp_host_url,
exit_handler_op = exit_op(kfp_host_url="$(KFP_HOST)",
azdocallbackinfo=azdocallbackinfo,
run_id=dsl.RUN_ID_PLACEHOLDER,
tenant_id="$(AZ_TENANT_ID)",
service_principal_id="$(AZ_CLIENT_ID)",
service_principal_password="$(AZ_CLIENT_SECRET)").apply(use_azure_secret()).apply(use_image(exit_image_name)) # noqa: E501
service_principal_password="$(AZ_CLIENT_SECRET)").apply(use_azure_secret()).apply(use_kfp_host_secret()).apply(use_image(exit_image_name)) # noqa: E501
with dsl.ExitHandler(exit_op=exit_handler_op):
@ -88,29 +87,30 @@ def tacosandburritos_train(
model_folder=model_folder,
images=training_dataset,
dataset=operations['preprocess'].outputs['dataset']). \
set_memory_request('16G'). \
add_env_variable(V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)). \
add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \
add_env_variable(V1EnvVar(name="GIT_PYTHON_REFRESH", value='quiet')). \
apply(use_image(train_image_name))
set_memory_request('16G'). \
add_env_variable(V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)). \
add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \
add_env_variable(V1EnvVar(name="GIT_PYTHON_REFRESH", value='quiet')). \
apply(use_image(train_image_name))
operations['training'].after(operations['preprocess'])
operations['evaluate'] = evaluate_op(model=operations['training'].outputs['model'])
operations['evaluate'].after(operations['training'])
operations['evaluate'] = evaluate_op(
model=operations['training'].outputs['model'])
operations['evaluate'].after(operations['training'])
operations['register to AML'] = register_op(base_path=persistent_volume_path,
model_file='latest.h5',
model_name=model_name,
tenant_id='$(AZ_TENANT_ID)',
service_principal_id='$(AZ_CLIENT_ID)',
service_principal_password='$(AZ_CLIENT_SECRET)',
subscription_id='$(AZ_SUBSCRIPTION_ID)',
resource_group=resource_group,
workspace=workspace,
run_id=dsl.RUN_ID_PLACEHOLDER). \
apply(use_azure_secret()). \
apply(use_image(register_images_name))
model_file='latest.h5',
model_name=model_name,
tenant_id='$(AZ_TENANT_ID)',
service_principal_id='$(AZ_CLIENT_ID)',
service_principal_password='$(AZ_CLIENT_SECRET)',
subscription_id='$(AZ_SUBSCRIPTION_ID)',
resource_group=resource_group,
workspace=workspace,
run_id=dsl.RUN_ID_PLACEHOLDER). \
apply(use_azure_secret()). \
apply(use_image(register_images_name))
operations['register to AML'].after(operations['evaluate'])
@ -118,9 +118,9 @@ def tacosandburritos_train(
model_name=model_name,
experiment_name='mexicanfood',
run_id=dsl.RUN_ID_PLACEHOLDER). \
apply(use_azure_secret()). \
add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \
apply(use_image(register_mlflow_image_name))
apply(use_azure_secret()). \
add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \
apply(use_image(register_mlflow_image_name))
operations['register to mlflow'].after(operations['register to AML'])

Просмотреть файл

@ -0,0 +1,42 @@
import argparse
import adal
import sys
def main():
parser = argparse.ArgumentParser("KFP host Auth")
parser.add_argument(
"--tenant",
type=str,
required=False,
help="Tenant"
)
parser.add_argument(
"--service_principal",
type=str,
required=False,
help="Service Principal"
)
parser.add_argument(
"--sp_secret",
type=str,
required=False,
help="Service Principal Secret"
)
args = parser.parse_args()
authorityHostUrl = "https://login.microsoftonline.com"
GRAPH_RESOURCE = '00000002-0000-0000-c000-000000000000'
authority_url = authorityHostUrl + '/' + str(args.tenant)
context = adal.AuthenticationContext(authority_url)
token = context.acquire_token_with_client_credentials(GRAPH_RESOURCE, args.service_principal, args.sp_secret) # noqa: E501
return token['accessToken']
if __name__ == '__main__':
sys.exit(main())

Просмотреть файл

@ -41,6 +41,27 @@ def use_databricks_secret(secret_name='databricks-secret'):
return _use_databricks_secret
def use_kfp_host_secret(secret_name='kfp-host-secret'):
def _use_kfp_host_secret(task):
from kubernetes import client as k8s_client
(
task.container
.add_env_variable(
k8s_client.V1EnvVar(
name='KFP_HOST',
value_from=k8s_client.V1EnvVarSource(
secret_key_ref=k8s_client.V1SecretKeySelector(
name=secret_name,
key='KFP_HOST'
)
)
)
)
)
return task
return _use_kfp_host_secret
def use_image(image_name):
def _use_image(task):
task.image = image_name

Просмотреть файл

@ -0,0 +1,5 @@
# Initialize variables:
# KFP_HOST=
# KUBEFLOW_NAMESPACE=kubeflow
kubectl create secret generic kfp-host-secret --from-literal=KFP_HOST=$KFP_HOST -n $KUBEFLOW_NAMESPACE