Added comments and refactoring code in hashmaps
This commit is contained in:
Родитель
c82362f1d8
Коммит
d6e18ed9ea
Двоичные данные
modules/__pycache__/__init__.cpython-36.pyc
Двоичные данные
modules/__pycache__/__init__.cpython-36.pyc
Двоичный файл не отображается.
Двоичные данные
modules/__pycache__/data_ingestion_step.cpython-36.pyc
Двоичные данные
modules/__pycache__/data_ingestion_step.cpython-36.pyc
Двоичный файл не отображается.
Двоичные данные
modules/__pycache__/data_preprocess_step.cpython-36.pyc
Двоичные данные
modules/__pycache__/data_preprocess_step.cpython-36.pyc
Двоичный файл не отображается.
Двоичные данные
modules/deploy/__pycache__/deploy_step.cpython-36.pyc
Двоичные данные
modules/deploy/__pycache__/deploy_step.cpython-36.pyc
Двоичный файл не отображается.
Двоичные данные
modules/deploy/__pycache__/deploy_step.cpython-37.pyc
Двоичные данные
modules/deploy/__pycache__/deploy_step.cpython-37.pyc
Двоичный файл не отображается.
|
@ -4,6 +4,22 @@ from azureml.pipeline.core import PipelineData
|
|||
from azureml.pipeline.core import PipelineParameter
|
||||
|
||||
def deploy_step(model_dir, accuracy_file, test_dir, compute_target):
|
||||
'''
|
||||
This step registers and deploys a new model on its first run. In subsequent runs, it will only register
|
||||
and deploy a new model if the training dataset has changed or the dataset did not change, but the accuracy improved.
|
||||
|
||||
:param model_dir: The reference to the directory containing the trained model
|
||||
:type model_dir: DataReference
|
||||
:param accuracy_file: The reference to the file containing the evaluation accuracy
|
||||
:type accuracy_file: DataReference
|
||||
:param test_dir: The reference to the directory containing the testing data
|
||||
:type test_dir: DataReference
|
||||
:param compute_target: The compute target to run the step on
|
||||
:type compute_target: ComputeTarget
|
||||
|
||||
:return: The preprocess step, step outputs dictionary (keys: scoring_url)
|
||||
:rtype: PythonScriptStep, dict
|
||||
'''
|
||||
|
||||
scoring_url = PipelineData(
|
||||
name='scoring_url',
|
||||
|
@ -13,6 +29,7 @@ def deploy_step(model_dir, accuracy_file, test_dir, compute_target):
|
|||
is_directory=False)
|
||||
|
||||
outputs = [scoring_url]
|
||||
outputs_map = { 'scoring_url': scoring_url }
|
||||
|
||||
step = PythonScriptStep(
|
||||
script_name='deploy.py',
|
||||
|
@ -29,5 +46,5 @@ def deploy_step(model_dir, accuracy_file, test_dir, compute_target):
|
|||
allow_reuse=False
|
||||
)
|
||||
|
||||
return step, outputs
|
||||
return step, outputs_map
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
from __future__ import print_function, division
|
||||
import argparse
|
||||
import time
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torchvision import datasets, models, transforms
|
||||
|
||||
def load_data(test_dir):
|
||||
|
||||
test_transform = transforms.Compose([
|
||||
transforms.Resize(200),
|
||||
transforms.CenterCrop(200),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(mean=[0.485, 0.456, 0.405],
|
||||
std=[0.229, 0.224, 0.225])
|
||||
])
|
||||
|
||||
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)
|
||||
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=True, num_workers=4)
|
||||
|
||||
dataset_size = len(test_loader.dataset)
|
||||
class_names = test_dataset.classes
|
||||
|
||||
return test_loader, dataset_size, class_names
|
||||
|
||||
def evaluate_model(model, criterion, dataloader, dataset_size, class_names, device):
|
||||
|
||||
model.eval()
|
||||
running_loss = 0.0
|
||||
running_corrects = 0
|
||||
|
||||
for batch_idx, (inputs, labels) in enumerate(dataloader):
|
||||
inputs = inputs.to(device)
|
||||
labels = labels.to(device)
|
||||
|
||||
outputs = model(inputs)
|
||||
_, preds = torch.max(outputs, 1)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
running_loss += loss.item() * inputs.size(0)
|
||||
corrects = torch.sum(preds == labels.data)
|
||||
running_corrects += corrects
|
||||
|
||||
print('{}/{} predictions correct.'.format(running_corrects, dataset_size))
|
||||
loss = running_loss / dataset_size
|
||||
acc = running_corrects.double() / dataset_size
|
||||
print('Loss: {:.4f} Acc: {:.4f}'.format(loss, acc))
|
||||
|
||||
return acc
|
||||
|
||||
# Define arguments
|
||||
parser = argparse.ArgumentParser(description='Evaluate arg parser')
|
||||
parser.add_argument('--test_dir', type=str, help='Directory where testing data is stored')
|
||||
parser.add_argument('--model_dir', type=str, help='Directory where model is stored')
|
||||
parser.add_argument('--accuracy_file', type=str, help='File to output the accuracy to')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get arguments from parser
|
||||
test_dir = args.test_dir
|
||||
model_dir = args.model_dir
|
||||
accuracy_file = args.accuracy_file
|
||||
|
||||
# Load testing data, model, and device
|
||||
test_loader, dataset_size, class_names = load_data(test_dir)
|
||||
model = torch.load(os.path.join(model_dir,'model.pt'))
|
||||
device = torch.device('cuda:0')
|
||||
|
||||
# Define criterion
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
# Evaluate model
|
||||
acc = evaluate_model(model, criterion, test_loader, dataset_size, class_names, device)
|
||||
|
||||
# Output accuracy to file
|
||||
with open(accuracy_file, 'w+') as f:
|
||||
f.write(str(acc.item()))
|
|
@ -1,40 +0,0 @@
|
|||
import os
|
||||
from azureml.pipeline.steps import PythonScriptStep
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
from azureml.pipeline.core import PipelineData
|
||||
from azureml.pipeline.core import PipelineParameter
|
||||
from azureml.pipeline.steps import EstimatorStep
|
||||
from azureml.train.dnn import PyTorch
|
||||
|
||||
def evaluate_step(model_dir, test_dir, compute_target):
|
||||
|
||||
accuracy_file = PipelineData(
|
||||
name='accuracy_file',
|
||||
pipeline_output_name='accuracy_file',
|
||||
datastore=test_dir.datastore,
|
||||
output_mode='mount',
|
||||
is_directory=False)
|
||||
|
||||
outputs = [accuracy_file]
|
||||
|
||||
estimator = PyTorch(
|
||||
source_directory=os.path.dirname(os.path.abspath(__file__)),
|
||||
entry_script='evaluate.py',
|
||||
framework_version='1.3',
|
||||
compute_target=compute_target,
|
||||
use_gpu=True)
|
||||
|
||||
step = EstimatorStep(
|
||||
estimator=estimator,
|
||||
estimator_entry_script_arguments=[
|
||||
'--test_dir', test_dir,
|
||||
'--model_dir', model_dir,
|
||||
'--accuracy_file', accuracy_file
|
||||
],
|
||||
inputs=[model_dir, test_dir],
|
||||
outputs=outputs,
|
||||
compute_target=compute_target,
|
||||
allow_reuse=False)
|
||||
|
||||
return step, outputs
|
Двоичные данные
modules/evaluate/__pycache__/evaluate_step.cpython-36.pyc
Двоичные данные
modules/evaluate/__pycache__/evaluate_step.cpython-36.pyc
Двоичный файл не отображается.
Двоичные данные
modules/evaluate/__pycache__/evaluate_step.cpython-37.pyc
Двоичные данные
modules/evaluate/__pycache__/evaluate_step.cpython-37.pyc
Двоичный файл не отображается.
|
@ -6,7 +6,9 @@ import torch.nn as nn
|
|||
from torchvision import datasets, models, transforms
|
||||
|
||||
def load_data(test_dir):
|
||||
|
||||
'''
|
||||
Loads the the testing data
|
||||
'''
|
||||
test_transform = transforms.Compose([
|
||||
transforms.Resize(200),
|
||||
transforms.CenterCrop(200),
|
||||
|
@ -24,7 +26,9 @@ def load_data(test_dir):
|
|||
return test_loader, dataset_size, class_names
|
||||
|
||||
def evaluate_model(model, criterion, dataloader, dataset_size, class_names, device):
|
||||
|
||||
'''
|
||||
Evaluates the model
|
||||
'''
|
||||
model.eval()
|
||||
running_loss = 0.0
|
||||
running_corrects = 0
|
||||
|
|
|
@ -8,6 +8,19 @@ from azureml.pipeline.steps import EstimatorStep
|
|||
from azureml.train.dnn import PyTorch
|
||||
|
||||
def evaluate_step(model_dir, test_dir, compute_target):
|
||||
'''
|
||||
This step evaluates the trained model on the testing data and outputs the accuracy.
|
||||
|
||||
:param model_dir: The reference to the directory containing the trained model
|
||||
:type model_dir: DataReference
|
||||
:param test_dir: The reference to the directory containing the testing data
|
||||
:type test_dir: DataReference
|
||||
:param compute_target: The compute target to run the step on
|
||||
:type compute_target: ComputeTarget
|
||||
|
||||
:return: The preprocess step, step outputs dictionary (keys: accuracy_file)
|
||||
:rtype: EstimatorStep, dict
|
||||
'''
|
||||
|
||||
accuracy_file = PipelineData(
|
||||
name='accuracy_file',
|
||||
|
@ -17,6 +30,7 @@ def evaluate_step(model_dir, test_dir, compute_target):
|
|||
is_directory=False)
|
||||
|
||||
outputs = [accuracy_file]
|
||||
outputs_map = { 'accuracy_file': accuracy_file }
|
||||
|
||||
estimator = PyTorch(
|
||||
source_directory=os.path.dirname(os.path.abspath(__file__)),
|
||||
|
@ -37,4 +51,4 @@ def evaluate_step(model_dir, test_dir, compute_target):
|
|||
compute_target=compute_target,
|
||||
allow_reuse=False)
|
||||
|
||||
return step, outputs
|
||||
return step, outputs_map
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
import os
|
||||
from azureml.pipeline.steps import PythonScriptStep
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.pipeline.core import PipelineData
|
||||
from azureml.pipeline.core import PipelineParameter
|
||||
|
||||
def data_ingestion_step(datastore_reference, compute_target):
|
||||
|
||||
run_config = RunConfiguration()
|
||||
run_config.environment.environment_variables = {'COGNITIVE_SERVICES_API_KEY': os.environ['COGNITIVE_SERVICES_API_KEY']}
|
||||
run_config.environment.docker.enabled = True
|
||||
|
||||
num_images = PipelineParameter(name='num_images', default_value=25)
|
||||
|
||||
raw_data_dir = PipelineData(
|
||||
name='raw_data_dir',
|
||||
pipeline_output_name='raw_data_dir',
|
||||
datastore=datastore_reference.datastore,
|
||||
output_mode='mount',
|
||||
is_directory=True)
|
||||
|
||||
outputs = [raw_data_dir]
|
||||
|
||||
step = PythonScriptStep(
|
||||
script_name='data_ingestion.py',
|
||||
arguments=['--output_dir', raw_data_dir, '--num_images', num_images],
|
||||
inputs=[datastore_reference],
|
||||
outputs=outputs,
|
||||
compute_target=compute_target,
|
||||
source_directory=os.path.dirname(os.path.abspath(__file__)),
|
||||
runconfig=run_config,
|
||||
allow_reuse=False
|
||||
)
|
||||
|
||||
return step, outputs
|
Двоичные данные
modules/ingestion/__pycache__/data_ingestion_step.cpython-36.pyc
Двоичные данные
modules/ingestion/__pycache__/data_ingestion_step.cpython-36.pyc
Двоичный файл не отображается.
Двоичные данные
modules/ingestion/__pycache__/data_ingestion_step.cpython-37.pyc
Двоичные данные
modules/ingestion/__pycache__/data_ingestion_step.cpython-37.pyc
Двоичный файл не отображается.
|
@ -5,6 +5,20 @@ from azureml.pipeline.core import PipelineData
|
|||
from azureml.pipeline.core import PipelineParameter
|
||||
|
||||
def data_ingestion_step(datastore_reference, compute_target):
|
||||
'''
|
||||
This step will leverage Azure Cognitive Services to search the web for images
|
||||
to create a dataset. This replicates the real-world scenario of data being
|
||||
ingested from a constantly changing source. The same 10 classes in the CIFAR-10 dataset
|
||||
will be used (airplane, automobile, bird, cat, deer, dog, frog, horse, ship, truck).
|
||||
|
||||
:param datastore_reference: The reference to the datastore that will be used
|
||||
:type datastore_reference: DataReference
|
||||
:param compute_target: The compute target to run the step on
|
||||
:type compute_target: ComputeTarget
|
||||
|
||||
:return: The ingestion step, step outputs dictionary (keys: raw_data_dir)
|
||||
:rtype: PythonScriptStep, dict
|
||||
'''
|
||||
|
||||
run_config = RunConfiguration()
|
||||
run_config.environment.environment_variables = {'COGNITIVE_SERVICES_API_KEY': os.environ['COGNITIVE_SERVICES_API_KEY']}
|
||||
|
@ -20,6 +34,7 @@ def data_ingestion_step(datastore_reference, compute_target):
|
|||
is_directory=True)
|
||||
|
||||
outputs = [raw_data_dir]
|
||||
outputs_map = { 'raw_data_dir': raw_data_dir }
|
||||
|
||||
step = PythonScriptStep(
|
||||
script_name='data_ingestion.py',
|
||||
|
@ -32,4 +47,4 @@ def data_ingestion_step(datastore_reference, compute_target):
|
|||
allow_reuse=False
|
||||
)
|
||||
|
||||
return step, outputs
|
||||
return step, outputs_map
|
||||
|
|
|
@ -1,72 +0,0 @@
|
|||
import os
|
||||
import argparse
|
||||
import random
|
||||
import cv2
|
||||
from imutils import paths
|
||||
|
||||
def preprocess_images(files, image_dim, output_dir, label):
|
||||
'''
|
||||
Load files, crop to consistent size, and save to respective folder
|
||||
'''
|
||||
# Make class directory
|
||||
class_directory = '{}/{}'.format(output_dir, label)
|
||||
if not os.path.exists(class_directory):
|
||||
os.makedirs(class_directory)
|
||||
|
||||
# Iterate through files
|
||||
for f in files:
|
||||
temp = f.split('/')
|
||||
output_file = '{}/{}/{}'.format(output_dir, label, temp[-1])
|
||||
try:
|
||||
image = cv2.imread(f)
|
||||
image = cv2.resize(image, (image_dim, image_dim))
|
||||
cv2.imwrite(output_file, image)
|
||||
print('Cropping image: {}'.format(output_file))
|
||||
except:
|
||||
print('Removing corrupted file: {}'.format(output_file))
|
||||
|
||||
# Define arguments
|
||||
parser = argparse.ArgumentParser(description='Web scraping arg parser')
|
||||
parser.add_argument('--raw_data_dir', type=str, help='Directory where raw data is stored')
|
||||
parser.add_argument('--image_dim', type=int, help='Image dimension to be cropped to')
|
||||
parser.add_argument('--train_dir', type=str, help='Directory to output the processed training data')
|
||||
parser.add_argument('--valid_dir', type=str, help='Directory to output the processed valid data')
|
||||
parser.add_argument('--test_dir', type=str, help='Directory to output the processed test data')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get arguments from parser
|
||||
raw_data_dir = args.raw_data_dir
|
||||
image_dim = args.image_dim
|
||||
train_dir = args.train_dir
|
||||
valid_dir = args.valid_dir
|
||||
test_dir = args.test_dir
|
||||
|
||||
# Make train, valid, test directories
|
||||
if not os.path.exists(train_dir):
|
||||
os.makedirs(train_dir)
|
||||
|
||||
if not os.path.exists(valid_dir):
|
||||
os.makedirs(valid_dir)
|
||||
|
||||
if not os.path.exists(test_dir):
|
||||
os.makedirs(test_dir)
|
||||
|
||||
# Get all the classes that have been sorted into directories from previous step
|
||||
classes = os.listdir(raw_data_dir)
|
||||
|
||||
for label in classes:
|
||||
|
||||
# Get and shuffle files
|
||||
image_files = list(paths.list_images('{}/{}'.format(raw_data_dir, label)))
|
||||
random.shuffle(image_files)
|
||||
|
||||
# Split into train, valid, test sets
|
||||
num_images = len(image_files)
|
||||
train_files = image_files[0:int(num_images*0.7)]
|
||||
valid_files = image_files[int(num_images*0.7):int(num_images*0.9)]
|
||||
test_files = image_files[int(num_images*0.9):num_images]
|
||||
|
||||
# Load files, crop to consistent size, and save to respective folder
|
||||
preprocess_images(train_files, image_dim, train_dir, label)
|
||||
preprocess_images(valid_files, image_dim, valid_dir, label)
|
||||
preprocess_images(test_files, image_dim, test_dir, label)
|
|
@ -1,56 +0,0 @@
|
|||
import os
|
||||
from azureml.pipeline.steps import PythonScriptStep
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
from azureml.pipeline.core import PipelineData
|
||||
from azureml.pipeline.core import PipelineParameter
|
||||
|
||||
def data_preprocess_step(raw_data_dir, compute_target):
|
||||
|
||||
run_config = RunConfiguration()
|
||||
run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['opencv-python==4.1.1.26', 'imutils==0.5.3'])
|
||||
run_config.environment.docker.enabled = True
|
||||
|
||||
image_dim = PipelineParameter(name='image_dim', default_value=200)
|
||||
|
||||
train_dir = PipelineData(
|
||||
name='train_dir',
|
||||
pipeline_output_name='train_dir',
|
||||
datastore=raw_data_dir.datastore,
|
||||
output_mode='mount',
|
||||
is_directory=True)
|
||||
|
||||
valid_dir = PipelineData(
|
||||
name='valid_dir',
|
||||
pipeline_output_name='valid_dir',
|
||||
datastore=raw_data_dir.datastore,
|
||||
output_mode='mount',
|
||||
is_directory=True)
|
||||
|
||||
test_dir = PipelineData(
|
||||
name='test_dir',
|
||||
pipeline_output_name='test_dir',
|
||||
datastore=raw_data_dir.datastore,
|
||||
output_mode='mount',
|
||||
is_directory=True)
|
||||
|
||||
outputs = [train_dir, valid_dir, test_dir]
|
||||
|
||||
step = PythonScriptStep(
|
||||
script_name='data_preprocess.py',
|
||||
arguments=[
|
||||
'--raw_data_dir', raw_data_dir,
|
||||
'--train_dir', train_dir,
|
||||
'--valid_dir', valid_dir,
|
||||
'--test_dir', test_dir,
|
||||
'--image_dim', image_dim
|
||||
],
|
||||
inputs=[raw_data_dir],
|
||||
outputs=outputs,
|
||||
compute_target=compute_target,
|
||||
runconfig=run_config,
|
||||
source_directory=os.path.dirname(os.path.abspath(__file__)),
|
||||
allow_reuse=False
|
||||
)
|
||||
|
||||
return step, outputs
|
Двоичный файл не отображается.
Двоичный файл не отображается.
|
@ -6,6 +6,18 @@ from azureml.pipeline.core import PipelineData
|
|||
from azureml.pipeline.core import PipelineParameter
|
||||
|
||||
def data_preprocess_step(raw_data_dir, compute_target):
|
||||
'''
|
||||
This step will take the raw data downloaded from the previous step and preprocess it by cropping
|
||||
it to a consistent size, shuffling the data, and splitting it into train, valid, and test directories.
|
||||
|
||||
:param raw_data_dir: The reference to the directory containing the raw data
|
||||
:type raw_data_dir: DataReference
|
||||
:param compute_target: The compute target to run the step on
|
||||
:type compute_target: ComputeTarget
|
||||
|
||||
:return: The preprocess step, step outputs dictionary (keys: train_dir, valid_dir, test_dir)
|
||||
:rtype: PythonScriptStep, dict
|
||||
'''
|
||||
|
||||
run_config = RunConfiguration()
|
||||
run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['opencv-python==4.1.1.26', 'imutils==0.5.3'])
|
||||
|
@ -35,6 +47,11 @@ def data_preprocess_step(raw_data_dir, compute_target):
|
|||
is_directory=True)
|
||||
|
||||
outputs = [train_dir, valid_dir, test_dir]
|
||||
outputs_map = {
|
||||
'train_dir': train_dir,
|
||||
'valid_dir': valid_dir,
|
||||
'test_dir': test_dir,
|
||||
}
|
||||
|
||||
step = PythonScriptStep(
|
||||
script_name='data_preprocess.py',
|
||||
|
@ -53,4 +70,4 @@ def data_preprocess_step(raw_data_dir, compute_target):
|
|||
allow_reuse=False
|
||||
)
|
||||
|
||||
return step, outputs
|
||||
return step, outputs_map
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
# Define arguments
|
||||
parser = argparse.ArgumentParser(description='Training arg parser')
|
||||
parser.add_argument('--train_dir', type=str, help='Directory where training data is stored')
|
||||
parser.add_argument('--valid_dir', type=str, help='Directory where validation data is stored')
|
||||
parser.add_argument('--output_dir', type=str, help='Directory to output the model to')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get arguments from parser
|
||||
train_dir = args.train_dir
|
||||
valid_dir = args.valid_dir
|
||||
output_dir = args.output_dir
|
|
@ -1,49 +0,0 @@
|
|||
import os
|
||||
from azureml.pipeline.steps import PythonScriptStep
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
from azureml.pipeline.core import PipelineData
|
||||
from azureml.pipeline.core import PipelineParameter
|
||||
from azureml.pipeline.steps import EstimatorStep
|
||||
from azureml.train.dnn import PyTorch
|
||||
|
||||
def train_step(train_dir, valid_dir, compute_target):
|
||||
|
||||
num_epochs = PipelineParameter(name='num_epochs', default_value=25)
|
||||
batch_size = PipelineParameter(name='batch_size', default_value=16)
|
||||
learning_rate = PipelineParameter(name='learning_rate', default_value=0.001)
|
||||
momentum = PipelineParameter(name='momentum', default_value=0.9)
|
||||
|
||||
model_dir = PipelineData(
|
||||
name='model_dir',
|
||||
pipeline_output_name='model_dir',
|
||||
datastore=train_dir.datastore,
|
||||
output_mode='mount',
|
||||
is_directory=True)
|
||||
|
||||
outputs = [model_dir]
|
||||
|
||||
estimator = PyTorch(
|
||||
source_directory=os.path.dirname(os.path.abspath(__file__)),
|
||||
entry_script='train.py',
|
||||
framework_version='1.3',
|
||||
compute_target=compute_target,
|
||||
use_gpu=True)
|
||||
|
||||
step = EstimatorStep(
|
||||
estimator=estimator,
|
||||
estimator_entry_script_arguments=[
|
||||
'--train_dir', train_dir,
|
||||
'--valid_dir', valid_dir,
|
||||
'--output_dir', model_dir,
|
||||
'--num_epochs', num_epochs,
|
||||
'--batch_size', batch_size,
|
||||
'--learning_rate', learning_rate,
|
||||
'--momentum', momentum
|
||||
],
|
||||
inputs=[train_dir, valid_dir],
|
||||
compute_target=compute_target,
|
||||
outputs=outputs,
|
||||
allow_reuse=False)
|
||||
|
||||
return step, outputs
|
Двоичные данные
modules/train/__pycache__/train_step.cpython-36.pyc
Двоичные данные
modules/train/__pycache__/train_step.cpython-36.pyc
Двоичный файл не отображается.
Двоичные данные
modules/train/__pycache__/train_step.cpython-37.pyc
Двоичные данные
modules/train/__pycache__/train_step.cpython-37.pyc
Двоичный файл не отображается.
|
@ -8,6 +8,20 @@ from azureml.pipeline.steps import EstimatorStep
|
|||
from azureml.train.dnn import PyTorch
|
||||
|
||||
def train_step(train_dir, valid_dir, compute_target):
|
||||
'''
|
||||
This step will fine-tune a RESNET-18 model on our dataset using PyTorch.
|
||||
It will use the corresponding input image directories as training and validation data.
|
||||
|
||||
:param train_dir: The reference to the directory containing the training data
|
||||
:type train_dir: DataReference
|
||||
:param valid_dir: The reference to the directory containing the validation data
|
||||
:type valid_dir: DataReference
|
||||
:param compute_target: The compute target to run the step on
|
||||
:type compute_target: ComputeTarget
|
||||
|
||||
:return: The preprocess step, step outputs dictionary (keys: model_dir)
|
||||
:rtype: EstimatorStep, dict
|
||||
'''
|
||||
|
||||
num_epochs = PipelineParameter(name='num_epochs', default_value=25)
|
||||
batch_size = PipelineParameter(name='batch_size', default_value=16)
|
||||
|
@ -22,6 +36,7 @@ def train_step(train_dir, valid_dir, compute_target):
|
|||
is_directory=True)
|
||||
|
||||
outputs = [model_dir]
|
||||
outputs_map = { 'model_dir': model_dir }
|
||||
|
||||
estimator = PyTorch(
|
||||
source_directory=os.path.dirname(os.path.abspath(__file__)),
|
||||
|
@ -46,4 +61,4 @@ def train_step(train_dir, valid_dir, compute_target):
|
|||
outputs=outputs,
|
||||
allow_reuse=False)
|
||||
|
||||
return step, outputs
|
||||
return step, outputs_map
|
||||
|
|
|
@ -41,16 +41,16 @@ datastore = DataReference(datastore, mode='mount')
|
|||
data_ingestion_step, data_ingestion_outputs = data_ingestion_step(datastore, cpu_compute_target)
|
||||
|
||||
# Step 2: Data preprocessing
|
||||
data_preprocess_step, data_preprocess_outputs = data_preprocess_step(data_ingestion_outputs[0], cpu_compute_target)
|
||||
data_preprocess_step, data_preprocess_outputs = data_preprocess_step(data_ingestion_outputs['raw_data_dir'], cpu_compute_target)
|
||||
|
||||
# Step 3: Train Model
|
||||
train_step, train_outputs = train_step(data_preprocess_outputs[0], data_preprocess_outputs[1], gpu_compute_target)
|
||||
train_step, train_outputs = train_step(data_preprocess_outputs['train_dir'], data_preprocess_outputs['valid_dir'], gpu_compute_target)
|
||||
|
||||
# Step 4: Evaluate Model
|
||||
evaluate_step, evaluate_outputs = evaluate_step(train_outputs[0], data_preprocess_outputs[2], gpu_compute_target)
|
||||
evaluate_step, evaluate_outputs = evaluate_step(train_outputs['model_dir'], data_preprocess_outputs['test_dir'], gpu_compute_target)
|
||||
|
||||
# Step 5: Deploy Model
|
||||
deploy_step, deploy_outputs = deploy_step(train_outputs[0], evaluate_outputs[0], data_preprocess_outputs[2], cpu_compute_target)
|
||||
deploy_step, deploy_outputs = deploy_step(train_outputs['model_dir'], evaluate_outputs['accuracy_file'], data_preprocess_outputs['test_dir'], cpu_compute_target)
|
||||
|
||||
# Submit pipeline
|
||||
print('Submitting pipeline ...')
|
||||
|
|
Загрузка…
Ссылка в новой задаче