register train data as data asset and use as inputs for pipeline runs
This commit is contained in:
Родитель
ce2efa6db5
Коммит
c8f6568ce6
|
@ -71,11 +71,8 @@ def main(args):
|
|||
# ------------ Reading Data ------------ #
|
||||
# -------------------------------------- #
|
||||
|
||||
print("mounted_path files: ")
|
||||
arr = os.listdir(args.raw_data)
|
||||
print(arr)
|
||||
|
||||
data = pd.read_csv((Path(args.raw_data) / 'taxi-data.csv'))
|
||||
data = pd.read_csv((Path(args.raw_data)))
|
||||
print(data)
|
||||
data = data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS + [TARGET_COL]]
|
||||
|
||||
# ------------- Split Data ------------- #
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
# Create train job compute cluster
|
||||
az ml compute create --file train/compute.yml
|
||||
# Register data asset
|
||||
az ml data create --file train/data.yml
|
||||
# Register train environment
|
||||
az ml environment create --file train/environment.yml
|
||||
# Create pipeline job
|
||||
|
|
|
@ -5,9 +5,9 @@ description: Training Pipeline to train a model that predicts taxi fare price
|
|||
|
||||
# <inputs_and_outputs>
|
||||
inputs:
|
||||
input: #using local data, will create an anonymous data asset
|
||||
type: uri_folder
|
||||
path: ../../../data/
|
||||
input:
|
||||
type: uri_file
|
||||
path: azureml:taxi-data@latest
|
||||
enable_monitoring: "false"
|
||||
table_name: 'taximonitoring'
|
||||
|
||||
|
|
|
@ -46,6 +46,11 @@ stages:
|
|||
environment_name: taxi-train-env
|
||||
environment_file: mlops/azureml/train/environment.yml
|
||||
enable_monitoring: $(enable_monitoring)
|
||||
- template: templates/${{ variables.version }}/register-data.yml@mlops-templates
|
||||
parameters:
|
||||
data_type: uri_file
|
||||
data_name: taxi-data
|
||||
data_file: mlops/azureml/train/data.yml
|
||||
- template: templates/${{ variables.version }}/run-pipeline.yml@mlops-templates
|
||||
parameters:
|
||||
pipeline_file: mlops/azureml/train/pipeline.yml
|
||||
|
|
Загрузка…
Ссылка в новой задаче