From d6e18ed9ea03a66128bf1cb51cca2de01e45b321 Mon Sep 17 00:00:00 2001
From: John Wu <johnwu0604@gmail.com>
Date: Mon, 9 Dec 2019 13:54:20 -0800
Subject: [PATCH] Added comments and refactoring code in hashmaps

---
 modules/__pycache__/__init__.cpython-36.pyc   | Bin 158 -> 0 bytes
 .../data_ingestion_step.cpython-36.pyc        | Bin 970 -> 0 bytes
 .../data_preprocess_step.cpython-36.pyc       | Bin 639 -> 0 bytes
 .../__pycache__/deploy_step.cpython-36.pyc    | Bin 914 -> 0 bytes
 .../__pycache__/deploy_step.cpython-37.pyc    | Bin 935 -> 0 bytes
 modules/deploy/deploy_step.py                 |  19 ++++-
 .../.ipynb_checkpoints/evaluate-checkpoint.py |  76 ------------------
 .../evaluate_step-checkpoint.py               |  40 ---------
 .../__pycache__/evaluate_step.cpython-36.pyc  | Bin 1219 -> 0 bytes
 .../__pycache__/evaluate_step.cpython-37.pyc  | Bin 1240 -> 0 bytes
 modules/evaluate/evaluate.py                  |   8 +-
 modules/evaluate/evaluate_step.py             |  16 +++-
 .../data_ingestion_step-checkpoint.py         |  35 --------
 .../data_ingestion_step.cpython-36.pyc        | Bin 1169 -> 0 bytes
 .../data_ingestion_step.cpython-37.pyc        | Bin 1190 -> 0 bytes
 modules/ingestion/data_ingestion_step.py      |  17 +++-
 .../data_preprocess-checkpoint.py             |  72 -----------------
 .../data_preprocess_step-checkpoint.py        |  56 -------------
 .../data_preprocess_step.cpython-36.pyc       | Bin 1435 -> 0 bytes
 .../data_preprocess_step.cpython-37.pyc       | Bin 1456 -> 0 bytes
 modules/preprocess/data_preprocess_step.py    |  19 ++++-
 .../.ipynb_checkpoints/train-checkpoint.py    |  11 ---
 .../train_step-checkpoint.py                  |  49 -----------
 .../__pycache__/train_step.cpython-36.pyc     | Bin 1463 -> 0 bytes
 .../__pycache__/train_step.cpython-37.pyc     | Bin 1484 -> 0 bytes
 modules/train/train_step.py                   |  17 +++-
 object-recognition-pipeline.py                |   8 +-
 27 files changed, 93 insertions(+), 350 deletions(-)
 delete mode 100644 modules/__pycache__/__init__.cpython-36.pyc
 delete mode 100644 modules/__pycache__/data_ingestion_step.cpython-36.pyc
 delete mode 100644 modules/__pycache__/data_preprocess_step.cpython-36.pyc
 delete mode 100644 modules/deploy/__pycache__/deploy_step.cpython-36.pyc
 delete mode 100644 modules/deploy/__pycache__/deploy_step.cpython-37.pyc
 delete mode 100644 modules/evaluate/.ipynb_checkpoints/evaluate-checkpoint.py
 delete mode 100644 modules/evaluate/.ipynb_checkpoints/evaluate_step-checkpoint.py
 delete mode 100644 modules/evaluate/__pycache__/evaluate_step.cpython-36.pyc
 delete mode 100644 modules/evaluate/__pycache__/evaluate_step.cpython-37.pyc
 delete mode 100644 modules/ingestion/.ipynb_checkpoints/data_ingestion_step-checkpoint.py
 delete mode 100644 modules/ingestion/__pycache__/data_ingestion_step.cpython-36.pyc
 delete mode 100644 modules/ingestion/__pycache__/data_ingestion_step.cpython-37.pyc
 delete mode 100644 modules/preprocess/.ipynb_checkpoints/data_preprocess-checkpoint.py
 delete mode 100644 modules/preprocess/.ipynb_checkpoints/data_preprocess_step-checkpoint.py
 delete mode 100644 modules/preprocess/__pycache__/data_preprocess_step.cpython-36.pyc
 delete mode 100644 modules/preprocess/__pycache__/data_preprocess_step.cpython-37.pyc
 delete mode 100644 modules/train/.ipynb_checkpoints/train-checkpoint.py
 delete mode 100644 modules/train/.ipynb_checkpoints/train_step-checkpoint.py
 delete mode 100644 modules/train/__pycache__/train_step.cpython-36.pyc
 delete mode 100644 modules/train/__pycache__/train_step.cpython-37.pyc

diff --git a/modules/__pycache__/__init__.cpython-36.pyc b/modules/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 30a667add6e019475d2b080cba38562247ff27f8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 158
zcmXr!<>m59xe?0%1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnFK_+ayb}GyDiE2R
zpOUH{TAW%`te=&iQC_N_n46<pkXewLlbM%VtecXWo39V!#^>gzl;)%s>&M4u=4F<|
U$LkeT-r}%<h}wZHDF$K&0HwVtWB>pF

diff --git a/modules/__pycache__/data_ingestion_step.cpython-36.pyc b/modules/__pycache__/data_ingestion_step.cpython-36.pyc
deleted file mode 100644
index 3a65bed396bae16c07ffdc3b1a7630ad1fb578d8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 970
zcmZuvOK%e~5VpOKY#xouqevhD61N<Z9D3n^5GVu`5T$C`O00yeW$mULcJ0XawraFJ
z!L>hxBY(%@#;JdS6XQ)=1%a&@kH?<*W<Gm+W25!<;OD0w9zws++8|)R0MSoDF~o3=
zVz-hU7bGUw*<S7ye(cw_p9e)4hhPUR{D$HN!&B62TtYP3>0!IMduEQMn5HUQn5p3l
z=i4jANQ$FuR;t8gQh=|sn=N>r3H~ZEN$RR_BLHcEj6gI4Wzbi2P8c?L4*EXIa7E@G
z^%-&NJfNXzFt0-Mi1{qI@XkHcWFc!*h((phnyfVoD>TEE2ZC9<BCNwUE~0b4@}2L9
zK=s|7_`&GIo1Mx2<Zw)<<GsVlXgsAacPI4S_~X9qsN|F~sFSix*`Sc6Fg?%uA}P3S
zx&v*b;<hbIvnUNMB;(6lbwDO&T)UPBi?d~`-&ax^mu1-ndoAm-)9;J2pjnYXz@_8C
zdRnEgdy#GHnoPQaOVq3^xG>s=nE*6x8!K3CyQwU|!>NHw+}KSmOO<km$5W{A%r;di
z()uNAJIQl-N)<0P@3j>C3uK9eY+RgVN($S)G8~xQTHDZ*L}kh6oI?prreC;nLC(RH
z4{227lzY-%V594f)uuyeK>>7CfvTJIHT;q9NIitp-|vToFvDc&bP6O64>VVLIG4w#
z<uEDo{^IJ>=sx3x90JiLlrj99YU)w~-}cUb{2}1zC!i1s@on591QYDzs9w8Z`LK60
z^3-Fv)r-_!SmJxhvQ)gt2iGMG9Ez^nv^R$-e1pLa$175|{)u$AR!ljBy7e{n2z)kr
Rw(9LUhMjhVM!4f9`2&!j8}I-C

diff --git a/modules/__pycache__/data_preprocess_step.cpython-36.pyc b/modules/__pycache__/data_preprocess_step.cpython-36.pyc
deleted file mode 100644
index 5fa44a6a514372f1501a874206dff194635bcbc7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 639
zcmZuvy^a$x5VpNP*@GmU5K!>~TO?c1P#}bK)Y0i8nw8MXvQ2K$+Fp%qof3tPmN(!9
zcpXZMj_O|FD#i<l2(UGu$Mfy{eDXe9Ebe{2`u6rKCFDCfSq%MGXzn?JB$5uKO(z?u
zW-X)1&xTCrZ9e(=Q0THP(Jy5Ak+d^Ox1^rkVm8^-G@f7X{k1h)0sZK=UX3Ys70iV-
z@A_Q;?t5!6HoxpgHS|Wk=AMf*MOqo!4DABVJw@<1kz_}*OGb}WW-|X&d|*e0U5m}(
zp~UoV1T}&c%DMHpKP*=(;5XgG$R1+mK-5LN@1~hL!?lWM9Ckq~<6SI!GX^}D!fJF>
z=aExBo;w>rs18dg;VtZI3Te(di}Ta^aE8H{JJkAxzydFKn?@Vo@IyjjrE1<d1+IB-
zuWv%b^{^U$iFYfhv~9GNVNkC5OH7w)qN4cde?*C$+!BE>Mj0(>j#y>z03&r3nRW3A
zKLk*ESpV*~P8HoG;}0n;s5Jy5rYC~*2J!JfndvLYk((M!I!_?Y5vvzJg}lU{jJOJ+
IqVu%b4}}D^z5oCK

diff --git a/modules/deploy/__pycache__/deploy_step.cpython-36.pyc b/modules/deploy/__pycache__/deploy_step.cpython-36.pyc
deleted file mode 100644
index bd3fedef9c3d378dfb7fb97a75122f59fb176085..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 914
zcmZWnOOMkq5VjrX(RRz0=Lrxeq#m+4aN&Ru5{MI8RjRmDg2=?)ZYn2^Y)7kB>1ny{
z58*HQgv0^uU*N<zZJ`KT^LZZj_?ywZ7>|eFpZ)xF8zA%p9W4U-8xZpnfFq72TKJPl
zT#*IAUJqnY(FN^w3VOH*K@WNK6)hr;=V%(=!fSLs#cncN+e=l?3tcvLZbbvWlUdma
zS=QnmvkdIJ2YbdetArK0p#D9q7-R_Y7Q}o8ut1694zDmLJmB;u+|rIN@iw%?Q|J(n
zJIs@g@B#Rf4t2OA%b*Lpzy>S23VV!fe2uR$WEkGW+o+4W@PYvG(YYI$Lg}(z=B<`4
ztW;au_tVhP8mPGk&7OL$THCZX@4W*Kg&3=}aHGAyQe3!`(&W6<qJYqPGvK0;YJJwM
zH={K57o?AFPScbXMXOn{&M!(SHp4Wv!q`4IPSYbk)7aDVt=xai^s=o)ZH<e{8u%C&
z?+6%or%)AmL~h}dux?^hs|)d$kQ*^6)m5%VYs56sFbYTyJoq7SL>WgLW-na~Ec-fR
z_Qmv`B=4E#IS?BBsl<){slz3QD)hu>4LzLt)YQNrd_(#%7{5MbRc$l2@mN4pvX4e+
zldaU{RhzL&riYfBl#5Db@Lel~$#&h`AGyy6opqy~ySF1qZ=M4nLa^TyBq5Qv2nCDc
zh$MaF9!>)N6mCvqeIE|@h;3ReDtUJJ{mlPhcCC5%Ul2w{>>B7Nx6jm9cLYy-+T_)a
P{%Z`DNdOXj;y3vXBE1Qd

diff --git a/modules/deploy/__pycache__/deploy_step.cpython-37.pyc b/modules/deploy/__pycache__/deploy_step.cpython-37.pyc
deleted file mode 100644
index 8e6f97704173a553ac8239cb050da0d019ba70be..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 935
zcmZWnOOMkq5VjrX(M`*i=LyiBka|dS*#ifJ5Fl<WE3LRxTA~oUZ7L^@Y=>2=?2YUG
z2lmKc@|6qw7dSCaTUdmx8P6*--%RH9Xmo(!ERTPl-*^c9aLWD=5MBZ4=fD_ZSfH7E
ziNF<^5$t47com(|UZx=XGaqE11z*uDVE7D8!W;M+olLMDpRUbP)@Ow(8*^rO1GeF5
z*>F+T{B3S>(C_Z`(_G~hH(V9eeGexDih<q$=}*86Kw`MVE6fP<7`^f@X-60M(l^9G
z=nxA#%%YC40obDsb+{u7uk$<4cq{tN?|ERtYkZ9%!@*T}8FWGC&j|pJPi(9Usmgkh
zwMtmOl5K6?O?*phfMy>wJLs8gP1BmJw+<LIqK#DC#yfi@8MnhyXRK7b0M~jmV7w7>
zJ#E&TI7wW9aNf;flH^6vs=Qcd^HT85L6R7*P465f$t^sS(7|(~+!fB%qOEvsv<=D{
z@Ms%u0cd-tkQG>XW*~?gJJzyQ1^)}k#<>vkB2&E8d=e>G1*8U!xh1efYD=5kENuuV
z`#!_`v+gZX)+5a_05tg1i5>mZhmH1KsIklHJ9g+&Qw@vo9jV8l{5nso+N4GLQFEo!
zm0Vu5=?B;iRes?|$<h0=P*Te|guukoyb{U&ALxYfN~Tc16<nv=@$R0?m4H#(cx&$U
z0P5*yzz`wWT?!PDz-fenMsYx*-g6H}o_Y$AlTh7<$3Dt8t>TrK?yFB-1-+fs!~dMH
hLwq|?x5|B{zTI2wiAx*3*usB_LDLbi2oK#QzX2WN51;@5

diff --git a/modules/deploy/deploy_step.py b/modules/deploy/deploy_step.py
index 1692902..891458f 100644
--- a/modules/deploy/deploy_step.py
+++ b/modules/deploy/deploy_step.py
@@ -4,6 +4,22 @@ from azureml.pipeline.core import PipelineData
 from azureml.pipeline.core import PipelineParameter
 
 def deploy_step(model_dir, accuracy_file, test_dir, compute_target):
+    '''
+    This step registers and deploys a new model on its first run. In subsequent runs, it will only register 
+    and deploy a new model if the training dataset has changed or the dataset did not change, but the accuracy improved.
+
+    :param model_dir: The reference to the directory containing the trained model
+    :type model_dir: DataReference
+    :param accuracy_file: The reference to the file containing the evaluation accuracy
+    :type accuracy_file: DataReference
+    :param test_dir: The reference to the directory containing the testing data
+    :type test_dir: DataReference
+    :param compute_target: The compute target to run the step on
+    :type compute_target: ComputeTarget
+    
+    :return: The preprocess step, step outputs dictionary (keys: scoring_url)
+    :rtype: PythonScriptStep, dict
+    '''
 
     scoring_url = PipelineData(
         name='scoring_url', 
@@ -13,6 +29,7 @@ def deploy_step(model_dir, accuracy_file, test_dir, compute_target):
         is_directory=False)
 
     outputs = [scoring_url]
+    outputs_map = { 'scoring_url': scoring_url }
 
     step = PythonScriptStep(
         script_name='deploy.py',
@@ -29,5 +46,5 @@ def deploy_step(model_dir, accuracy_file, test_dir, compute_target):
         allow_reuse=False
     )
 
-    return step, outputs
+    return step, outputs_map
  
\ No newline at end of file
diff --git a/modules/evaluate/.ipynb_checkpoints/evaluate-checkpoint.py b/modules/evaluate/.ipynb_checkpoints/evaluate-checkpoint.py
deleted file mode 100644
index ecd5737..0000000
--- a/modules/evaluate/.ipynb_checkpoints/evaluate-checkpoint.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from __future__ import print_function, division
-import argparse
-import time
-import torch
-import torch.nn as nn
-from torchvision import datasets, models, transforms
-
-def load_data(test_dir):
-
-    test_transform = transforms.Compose([
-        transforms.Resize(200),
-        transforms.CenterCrop(200),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.405],
-                             std=[0.229, 0.224, 0.225])
-    ])
-
-    test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)
-    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=True, num_workers=4)
-    
-    dataset_size = len(test_loader.dataset)
-    class_names = test_dataset.classes
-    
-    return test_loader, dataset_size, class_names
-
-def evaluate_model(model, criterion, dataloader, dataset_size, class_names, device):
-    
-    model.eval()
-    running_loss = 0.0
-    running_corrects = 0
-    
-    for batch_idx, (inputs, labels) in enumerate(dataloader):
-        inputs = inputs.to(device)
-        labels = labels.to(device)
-        
-        outputs = model(inputs)
-        _, preds = torch.max(outputs, 1)
-        loss = criterion(outputs, labels)
-        
-        running_loss += loss.item() * inputs.size(0)
-        corrects = torch.sum(preds == labels.data)
-        running_corrects += corrects
-
-    print('{}/{} predictions correct.'.format(running_corrects, dataset_size))
-    loss = running_loss / dataset_size
-    acc = running_corrects.double() / dataset_size
-    print('Loss: {:.4f} Acc: {:.4f}'.format(loss, acc))
-    
-    return acc
-
- # Define arguments
-parser = argparse.ArgumentParser(description='Evaluate arg parser')
-parser.add_argument('--test_dir', type=str, help='Directory where testing data is stored')
-parser.add_argument('--model_dir', type=str, help='Directory where model is stored')
-parser.add_argument('--accuracy_file', type=str, help='File to output the accuracy to')
-args = parser.parse_args()
-
-# Get arguments from parser
-test_dir = args.test_dir
-model_dir = args.model_dir
-accuracy_file = args.accuracy_file
-
-# Load testing data, model, and device
-test_loader, dataset_size, class_names = load_data(test_dir)
-model = torch.load(os.path.join(model_dir,'model.pt'))
-device = torch.device('cuda:0')
-
-# Define criterion
-criterion = nn.CrossEntropyLoss()
-
-# Evaluate model
-acc = evaluate_model(model, criterion, test_loader, dataset_size, class_names, device)
-
-# Output accuracy to file
-with open(accuracy_file, 'w+') as f:
-    f.write(str(acc.item()))
diff --git a/modules/evaluate/.ipynb_checkpoints/evaluate_step-checkpoint.py b/modules/evaluate/.ipynb_checkpoints/evaluate_step-checkpoint.py
deleted file mode 100644
index a3e613d..0000000
--- a/modules/evaluate/.ipynb_checkpoints/evaluate_step-checkpoint.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-from azureml.pipeline.steps import PythonScriptStep
-from azureml.core.runconfig import RunConfiguration
-from azureml.core.conda_dependencies import CondaDependencies
-from azureml.pipeline.core import PipelineData
-from azureml.pipeline.core import PipelineParameter
-from azureml.pipeline.steps import EstimatorStep
-from azureml.train.dnn import PyTorch
-
-def evaluate_step(model_dir, test_dir, compute_target):
-
-    accuracy_file = PipelineData(
-        name='accuracy_file', 
-        pipeline_output_name='accuracy_file',
-        datastore=test_dir.datastore,
-        output_mode='mount',
-        is_directory=False)
-
-    outputs = [accuracy_file]
-    
-    estimator = PyTorch(
-        source_directory=os.path.dirname(os.path.abspath(__file__)),
-        entry_script='evaluate.py',
-        framework_version='1.3',
-        compute_target=compute_target,
-        use_gpu=True)
-
-    step = EstimatorStep(
-        estimator=estimator,
-        estimator_entry_script_arguments=[
-            '--test_dir', test_dir, 
-            '--model_dir', model_dir, 
-            '--accuracy_file', accuracy_file
-        ],
-        inputs=[model_dir, test_dir],
-        outputs=outputs,
-        compute_target=compute_target,
-        allow_reuse=False)
-
-    return step, outputs
diff --git a/modules/evaluate/__pycache__/evaluate_step.cpython-36.pyc b/modules/evaluate/__pycache__/evaluate_step.cpython-36.pyc
deleted file mode 100644
index d54268437b0c6c409e905627cc8bb1735f77f2b9..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1219
zcmY*ZOK;RL5RRSqK1vG|Ra`*8We?dE>J=fR7LL`5mP;fQnb^CVB`?`dt6in1a_tY{
zx9|^qMMB~qAWn?E&2~xT$;>z7dD=N14tqyGetrJqSk_N#=fS|ggC!O~s6|n3#rlhL
zRNxpR&D*(MII&}RCwB`k_6+alei6ih;XUBPI5fNudwX%u@Il@$2Jry+koLY=@sOf(
zYt;V@(X6u(Qv1`DTvX+G%ClOYOIB;_V^f|~<t&>woXD&y0XqPIl2cZ*lCm<*m;i7v
z&1#lsB|9aOXroS^5>5&x83%CqUdXH<vf_8{{psqW;^`u_brSISumV^ouuLB$^ns!l
zEfK}krp~Q<ZMXIuT{{v>d+97)Lp|x=pc_P8>b02qGypqS2CYlO7SUd7(LU_*TB}8K
z+&W-m)4{EG?X_O(%rNv}c&3IVO`*(mmCUl7DYvMaQhpe@$|<1)^|-z(F{v6^H!?A3
zNTFZ>B{9{%!;6YCHONFlGtN>_t=4^ZN%DqBHm+A|`}O$v0^IJ4s^KZSH&X*vO1?^j
z8G$;O>DFCU{A+T_xPb1fy|gMIG)pAmb0(GF2$sz2X5EV-$%NFt*8M2bDda|Y5Jmr$
zHS$ylhMO^{=UX<}4V{4ZrT|1JFDoISQ2wSk!5_hBRG;K|b(L@i@kb$d0pmkW^zam}
zgmP*k7s`ixOq+c2MHnPVOm~xn>sgO{6>f7?L05ta!b9!wNRvbD)e&$ezgzqT2!E%O
zqLdS|)-;7;C!YkgXtJypSIvYJdDIO{M3fcP1cEd<6O(QZTPD$7VJd3asSMJCAYu%}
z!Wik-ffZm6q=!b<65!YYjfXvh1(?5r9iu(|6h7(+SvQ;&`MAqvtYZnTFZ#VQg`+d(
zO_`c&#`R55&$^vP2^vZgx^v+;{5vdl*ja7;YxB)8JTNdFRtKGx<RmM{v@Fe#Os1xn
YJLIKy8@$;x>n#FF>;nnVVE4uU0E$Ls%m4rY

diff --git a/modules/evaluate/__pycache__/evaluate_step.cpython-37.pyc b/modules/evaluate/__pycache__/evaluate_step.cpython-37.pyc
deleted file mode 100644
index ed77e06a0cd89cffaccab45d7b05dcc0071df588..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1240
zcmY*ZOK;RL5Kf%;KA;7P5Em-ovWM&n!4V-u3pdJ&mP;fQnb^CVlEjhiwAxjA<J$i~
zkNhQHxxha_oEUqX?UKlonQzAPwDV>->{;+EUi|v}$F;1VTIe1G%xhR;0fbn@F04d<
zQDK)TLAK_d!YSRvHN0DRrJwkQ_llqllhE)!@KF*OK7hTwq-XfB=$C_J0DMGx->hUv
z>|<-x{|(Wsqmiw4rc1eC)p5r2S{_SUYwTlF9k6Pa&l`?q&MLrm0U-F0*0dtD%5o|I
z98B|?7I{SvvBcV_lc$*Dl1j<}9KIDYFR^6&+I=uxo-m#*GDjx?PXH@~bpXrsK|&vh
z-P)%%LBt{MrFY@9&fLCmC6dmm``t6tm%){NWfPD1Eg}I4!OoLm>yfCnNw2j?A9nez
z)!K8^x?tmw!KHuUw|?u+5cFYqq=q=lpv-KU&hmmPuVhUn-;F%wR#1YvS6`QyvPRa8
zObr?lC|E#ARQ0d%k`bx~xkyRQX$Gq0s!z{w(O^l(^>XDr8$Uk*w>yG0Jfk;eYCtQ=
zm#Hu#P`fkTx^u?Are~B3=)Ss@u@XYlRAN4-QU#5m>AY@My*QRsNbPIYk7J!eVT9Xp
z{9jokUqxWJ9)sH7u<3T_6udVjAVT?h1p$Q$*2M{a7e=G{xG30p$|=MjMcf074>i%l
zQ-}%W)>tl70Qs0U1^A0FNSK=LrYYC69tA4e<f_811QUcu+TjCD_Ow@5z?uAE@y8(i
z#gnp<lWg)yKxZbWY;oR9KEUbY`5E1h_uuCkXM)YZ1UfOnWf6DN6EUGBn?Tg2pkmSu
zX2Ya942($)JC#B7y%l317DBduU05OVLAqmPEdh>P(D=wVScv#j*fF}rAHql7$E$|Z
zvKV){jCCx*_07LgW^k6qys0vC@3_7i>QT4TC_zJTO15qwho6b1_ByMLe{H^=jt>T=
o<!ZOHk{svNm{gUSn#t5OcZ)pHZo`-BX1%gO5<5Ubd$9eYf2Y)I_5c6?

diff --git a/modules/evaluate/evaluate.py b/modules/evaluate/evaluate.py
index 5cf0006..29197ad 100644
--- a/modules/evaluate/evaluate.py
+++ b/modules/evaluate/evaluate.py
@@ -6,7 +6,9 @@ import torch.nn as nn
 from torchvision import datasets, models, transforms
 
 def load_data(test_dir):
-
+    ''' 
+    Loads the the testing data 
+    '''
     test_transform = transforms.Compose([
         transforms.Resize(200),
         transforms.CenterCrop(200),
@@ -24,7 +26,9 @@ def load_data(test_dir):
     return test_loader, dataset_size, class_names
 
 def evaluate_model(model, criterion, dataloader, dataset_size, class_names, device):
-    
+    ''' 
+    Evaluates the model 
+    '''
     model.eval()
     running_loss = 0.0
     running_corrects = 0
diff --git a/modules/evaluate/evaluate_step.py b/modules/evaluate/evaluate_step.py
index a3e613d..40377d4 100644
--- a/modules/evaluate/evaluate_step.py
+++ b/modules/evaluate/evaluate_step.py
@@ -8,6 +8,19 @@ from azureml.pipeline.steps import EstimatorStep
 from azureml.train.dnn import PyTorch
 
 def evaluate_step(model_dir, test_dir, compute_target):
+    '''
+    This step evaluates the trained model on the testing data and outputs the accuracy.
+
+    :param model_dir: The reference to the directory containing the trained model
+    :type model_dir: DataReference
+    :param test_dir: The reference to the directory containing the testing data
+    :type test_dir: DataReference
+    :param compute_target: The compute target to run the step on
+    :type compute_target: ComputeTarget
+    
+    :return: The preprocess step, step outputs dictionary (keys: accuracy_file)
+    :rtype: EstimatorStep, dict
+    '''
 
     accuracy_file = PipelineData(
         name='accuracy_file', 
@@ -17,6 +30,7 @@ def evaluate_step(model_dir, test_dir, compute_target):
         is_directory=False)
 
     outputs = [accuracy_file]
+    outputs_map = { 'accuracy_file': accuracy_file }
     
     estimator = PyTorch(
         source_directory=os.path.dirname(os.path.abspath(__file__)),
@@ -37,4 +51,4 @@ def evaluate_step(model_dir, test_dir, compute_target):
         compute_target=compute_target,
         allow_reuse=False)
 
-    return step, outputs
+    return step, outputs_map
diff --git a/modules/ingestion/.ipynb_checkpoints/data_ingestion_step-checkpoint.py b/modules/ingestion/.ipynb_checkpoints/data_ingestion_step-checkpoint.py
deleted file mode 100644
index ac5199e..0000000
--- a/modules/ingestion/.ipynb_checkpoints/data_ingestion_step-checkpoint.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import os
-from azureml.pipeline.steps import PythonScriptStep
-from azureml.core.runconfig import RunConfiguration
-from azureml.pipeline.core import PipelineData
-from azureml.pipeline.core import PipelineParameter
-
-def data_ingestion_step(datastore_reference, compute_target):
-
-    run_config = RunConfiguration()
-    run_config.environment.environment_variables = {'COGNITIVE_SERVICES_API_KEY': os.environ['COGNITIVE_SERVICES_API_KEY']}
-    run_config.environment.docker.enabled = True
-
-    num_images = PipelineParameter(name='num_images', default_value=25)
-
-    raw_data_dir = PipelineData(
-        name='raw_data_dir', 
-        pipeline_output_name='raw_data_dir',
-        datastore=datastore_reference.datastore,
-        output_mode='mount',
-        is_directory=True)
-
-    outputs = [raw_data_dir]
-
-    step = PythonScriptStep(
-        script_name='data_ingestion.py',
-        arguments=['--output_dir', raw_data_dir, '--num_images', num_images],
-        inputs=[datastore_reference],
-        outputs=outputs,
-        compute_target=compute_target,
-        source_directory=os.path.dirname(os.path.abspath(__file__)),
-        runconfig=run_config,
-        allow_reuse=False
-    )
-
-    return step, outputs
diff --git a/modules/ingestion/__pycache__/data_ingestion_step.cpython-36.pyc b/modules/ingestion/__pycache__/data_ingestion_step.cpython-36.pyc
deleted file mode 100644
index 0cabeba25f648d06e03d8c5d6a6968ffb5f9f545..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1169
zcmZWn&2AGh5cWFzm%q}|7E#WC6r>~vt_XqBfFe+dno`6{$a2?Cx?z7J+uN$q=0dsl
zL3jq<#8<fV6*w{8q-hbm@_1%E{$~8mJa0B@=>6|6KW&75p*tT2{S_>^4}u|P#V9a4
zj;#a-*fM%0t|WF~m%1H0i5s}3?ttzE9_TLfzN5fr)(ExyYd}H=ElY0>FVtz4jzSU5
z)JXA}@g3%AFH6VKBo|agSqi?!Fq-i=O8Guj6zr`Vdq@RMxZ)x-Wtz`}RfE-oCBK1C
z=n^e3vy?RjxrZXFz|#t`88-XWA+GXRh1txx!i$RXp;Wg((<<{AX8x74unT))EgV&2
zRaPr(Rxc_OyFe4Ga0-K3pEgR_R2!GpC7`ngYhL*aw{Qz*jN!o>2f?G>hqniVqrq{X
zjQWSigI<3`UJnQ4UH|iuuBCZGqJ&PkyxxP7TUgsEyh}G3AJaTm<c!8S*9}3>2?GiU
zi-dNPEKgO-5k6eeJF^u<B+Jz-SEK}}nrNvq!F7G*PcjBbku(@Qgrkf3R*4y<klzsA
znO)2q?e;1j!i;rW;+vMQ>#}UwDhU;nJmIO5+Kp1kEw#72EcHg1CE(#iK@eAZQ)amc
z`E7Dt6?qz#{nm9F$Jsd%JeRz6PgKmia0X~RJ&Qz^>iSAC$@K0W0|*ikJ&8G#z_Rcw
z7v_?2FyR1<fT>2JX@nURZO^DW)gHX9q{*Wvvh?_bjH8$n((;1sHFr?am<yhUT<n<J
zH7J=ZYZk7-Gy^BLOJE68`~!){VEp;iO;XjRb0b5bz57veA-mJ;^gQp<ByQjIOtu+M
zvM#hHhbMH`o#_7C7-GPoJGv~5tiwvb8GQK+1j4R$->TsT#+LbQSUz@3t5#YV&cRm^
zPa(Qh6%Szx9?*F%coKJRit3nba>>{FFoaR+tUI=B(e8gYP1KU|l6;e<WaN%`0zU1(
RSU&ls1(q}{`c`B4<G&?7S``2Q

diff --git a/modules/ingestion/__pycache__/data_ingestion_step.cpython-37.pyc b/modules/ingestion/__pycache__/data_ingestion_step.cpython-37.pyc
deleted file mode 100644
index 4e2158a16ba8838c68e8eb5b6bca01385e88a033..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1190
zcmZWoOK;Oa5Vkjd<yBhRBFd=;Knhag3rB>2w4eylMop=NCA4zAo5aEPM!V})t(*&R
z?SG(0egpqxuUzO~;KZ!+D2TOoc4l^VUf=jtvsp7>ji3Jd_`@=cpITTh1k4NAVh;#0
zh?y9HK2c((C_tv>D@i4_0;}Y$#7>>SDR~=sH*kS>i2KzDJYo)vmUjc5jDwb`*83N7
z%(6km<Eb1-I@P+PJnOP-7>{xu%9v%KYxLtOP2!C1g)#(rV=4DT9;Q@M9_cjoa$(nD
zcVUaqK&0`-m?2_Ha{_e7h|K~`D%c`OpA#E9(j^sQ5&H_wD$;{gox+$@iAxaiuI!mr
zSR-?0%NnVYT49lTQ5ji<F)|Cg&|d4`8YOMYwQKVlypslLUU@U8a0+{fpy0KG;6eBB
z%|Y+Dce0NM`$s3e?*0J3?Dz27{SU{gmgOmq({Mz^%?_m8LdwdZF4ZJ-80Lw@XJL|4
z)!^YdCeQ**Vy^6z<(X{R+=C}-Yr1F=X1Sc^5|<9D+FD4)sj4sZDI?%17TO1m;OgRX
zqx2bP5MMXEGrhQMwA%}NFw@d)imzLqs*4h`MUaq>@|0#$C@0P!wovZ;u~2IfOF=`i
z1S2Zdx?nkv=xuOS<#`sBc&mDtB<vh>nhV;x!z;Qj#K0TP&SK6oRbMbVnA-kJfClk6
zJWVL1Kv?vNa{Wjv2)Drvhp9GrQxiQX%9@69tX!zAY?B+Fic;fYJd6{Hamx)hSG|Lb
zhLqDRqI^rgu0hIpo-=o}PyP4s&C;=eDgJKohamhO`)MZq$bT;=7yg8e&vX9}hM31^
zbho|xE{-@8YzQWRi65p(dkLaw6Phv~u*#t(e+7;IFH%@X1GrR~D_)tEdi^JeCqNA3
zn0L(@Y9M6l&w}kCr<7`?gy0@@75*5kTUCA^j^JK+nRA*Zon=xT9Zk#|xVnsBwmK^a
n=a_B(_flKUF(}c{VagW$MIM1pdC%r0KQ}=Vx~-nsn1ARG^afzr

diff --git a/modules/ingestion/data_ingestion_step.py b/modules/ingestion/data_ingestion_step.py
index ac5199e..90346da 100644
--- a/modules/ingestion/data_ingestion_step.py
+++ b/modules/ingestion/data_ingestion_step.py
@@ -5,6 +5,20 @@ from azureml.pipeline.core import PipelineData
 from azureml.pipeline.core import PipelineParameter
 
 def data_ingestion_step(datastore_reference, compute_target):
+    '''
+    This step will leverage Azure Cognitive Services to search the web for images 
+    to create a dataset. This replicates the real-world scenario of data being 
+    ingested from a constantly changing source. The same 10 classes in the CIFAR-10 dataset 
+    will be used (airplane, automobile, bird, cat, deer, dog, frog, horse, ship, truck). 
+
+    :param datastore_reference: The reference to the datastore that will be used
+    :type datastore_reference: DataReference
+    :param compute_target: The compute target to run the step on
+    :type compute_target: ComputeTarget
+    
+    :return: The ingestion step, step outputs dictionary (keys: raw_data_dir)
+    :rtype: PythonScriptStep, dict
+    '''
 
     run_config = RunConfiguration()
     run_config.environment.environment_variables = {'COGNITIVE_SERVICES_API_KEY': os.environ['COGNITIVE_SERVICES_API_KEY']}
@@ -20,6 +34,7 @@ def data_ingestion_step(datastore_reference, compute_target):
         is_directory=True)
 
     outputs = [raw_data_dir]
+    outputs_map = { 'raw_data_dir': raw_data_dir }
 
     step = PythonScriptStep(
         script_name='data_ingestion.py',
@@ -32,4 +47,4 @@ def data_ingestion_step(datastore_reference, compute_target):
         allow_reuse=False
     )
 
-    return step, outputs
+    return step, outputs_map
diff --git a/modules/preprocess/.ipynb_checkpoints/data_preprocess-checkpoint.py b/modules/preprocess/.ipynb_checkpoints/data_preprocess-checkpoint.py
deleted file mode 100644
index 928df7c..0000000
--- a/modules/preprocess/.ipynb_checkpoints/data_preprocess-checkpoint.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import os
-import argparse
-import random
-import cv2
-from imutils import paths
-
-def preprocess_images(files, image_dim, output_dir, label):
-    '''
-    Load files, crop to consistent size, and save to respective folder
-    '''
-    # Make class directory
-    class_directory = '{}/{}'.format(output_dir, label)
-    if not os.path.exists(class_directory):
-        os.makedirs(class_directory)
-
-    # Iterate through files
-    for f in files:
-        temp = f.split('/')
-        output_file = '{}/{}/{}'.format(output_dir, label, temp[-1])
-        try:
-            image = cv2.imread(f)
-            image = cv2.resize(image, (image_dim, image_dim))
-            cv2.imwrite(output_file, image)
-            print('Cropping image: {}'.format(output_file))
-        except:
-            print('Removing corrupted file: {}'.format(output_file))
-
-# Define arguments
-parser = argparse.ArgumentParser(description='Web scraping arg parser')
-parser.add_argument('--raw_data_dir', type=str, help='Directory where raw data is stored')
-parser.add_argument('--image_dim', type=int, help='Image dimension to be cropped to')
-parser.add_argument('--train_dir', type=str, help='Directory to output the processed training data')
-parser.add_argument('--valid_dir', type=str, help='Directory to output the processed valid data')
-parser.add_argument('--test_dir', type=str, help='Directory to output the processed test data')
-args = parser.parse_args()
-
-# Get arguments from parser
-raw_data_dir = args.raw_data_dir
-image_dim = args.image_dim
-train_dir = args.train_dir
-valid_dir = args.valid_dir
-test_dir = args.test_dir
-
-# Make train, valid, test directories
-if not os.path.exists(train_dir):
-    os.makedirs(train_dir)
-
-if not os.path.exists(valid_dir):
-    os.makedirs(valid_dir)
-
-if not os.path.exists(test_dir):
-    os.makedirs(test_dir)
-
-# Get all the classes that have been sorted into directories from previous step
-classes = os.listdir(raw_data_dir)
-
-for label in classes:
-
-    # Get and shuffle files
-    image_files = list(paths.list_images('{}/{}'.format(raw_data_dir, label)))
-    random.shuffle(image_files)
-
-    # Split into train, valid, test sets
-    num_images = len(image_files)
-    train_files = image_files[0:int(num_images*0.7)]
-    valid_files = image_files[int(num_images*0.7):int(num_images*0.9)]
-    test_files = image_files[int(num_images*0.9):num_images]
-
-    # Load files, crop to consistent size, and save to respective folder
-    preprocess_images(train_files, image_dim, train_dir, label)
-    preprocess_images(valid_files, image_dim, valid_dir, label)
-    preprocess_images(test_files, image_dim, test_dir, label)
diff --git a/modules/preprocess/.ipynb_checkpoints/data_preprocess_step-checkpoint.py b/modules/preprocess/.ipynb_checkpoints/data_preprocess_step-checkpoint.py
deleted file mode 100644
index 0c09fee..0000000
--- a/modules/preprocess/.ipynb_checkpoints/data_preprocess_step-checkpoint.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-from azureml.pipeline.steps import PythonScriptStep
-from azureml.core.runconfig import RunConfiguration
-from azureml.core.conda_dependencies import CondaDependencies
-from azureml.pipeline.core import PipelineData
-from azureml.pipeline.core import PipelineParameter
-
-def data_preprocess_step(raw_data_dir, compute_target):
-
-    run_config = RunConfiguration()
-    run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['opencv-python==4.1.1.26', 'imutils==0.5.3'])
-    run_config.environment.docker.enabled = True
-
-    image_dim = PipelineParameter(name='image_dim', default_value=200)
-
-    train_dir = PipelineData(
-        name='train_dir', 
-        pipeline_output_name='train_dir',
-        datastore=raw_data_dir.datastore,
-        output_mode='mount',
-        is_directory=True)
-
-    valid_dir = PipelineData(
-        name='valid_dir', 
-        pipeline_output_name='valid_dir',
-        datastore=raw_data_dir.datastore,
-        output_mode='mount',
-        is_directory=True)
-
-    test_dir = PipelineData(
-        name='test_dir', 
-        pipeline_output_name='test_dir',
-        datastore=raw_data_dir.datastore,
-        output_mode='mount',
-        is_directory=True)
-
-    outputs = [train_dir, valid_dir, test_dir]
-
-    step = PythonScriptStep(
-        script_name='data_preprocess.py',
-        arguments=[
-            '--raw_data_dir', raw_data_dir, 
-            '--train_dir', train_dir, 
-            '--valid_dir', valid_dir, 
-            '--test_dir', test_dir, 
-            '--image_dim', image_dim
-        ],
-        inputs=[raw_data_dir],
-        outputs=outputs,
-        compute_target=compute_target,
-        runconfig=run_config,
-        source_directory=os.path.dirname(os.path.abspath(__file__)),
-        allow_reuse=False
-    )
-
-    return step, outputs
diff --git a/modules/preprocess/__pycache__/data_preprocess_step.cpython-36.pyc b/modules/preprocess/__pycache__/data_preprocess_step.cpython-36.pyc
deleted file mode 100644
index bd3b89036672c604d227c092a3b503f7559cdca7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1435
zcmZ`&&2Aev5Z>kfuU4|HI7u%>f!@?cQromW#YKPwIrI=1b$h6Qf?#%Nd6oSG$+cl&
zb#ktLkiJCEeFk3BOJ1R;4yj0W`m+RQhcm-(hu_D2JRGL*;*VcGjvVK$bLS!GKgB6e
z(SQSP=`75flx{^9#5KNGdX>NMt?!pX6)r;S2W3>ni`e=h`pF_eKZ5v;vq-@`b0*0<
zM0H+HTzz<YtuC7SEEh$q&J=G=?1!#?*3{?4vJ*@dO^vZ53;=u1TV4aN^MXqZW~W8V
z%cADbnPR4Bw?1WpRa|kAn{${a!I|NFhEx8AMmaamhA4MMzy-1-;N7}CS$Qj;2H;yS
zq>+jtfDocvvhh>`F~q&IN+AIs(%WF;V{8C}4`KnvGRQuNg}vVgeYo^Cks81dM!f?E
zy$^?d07rcY<KA1Qy|Z-tsK+i;wi>QR)*h(C8~4VAdvJUkZ{j}gqjQ1`-ha7%*dQ7C
z<+Qa7o}7F=`x3`j->yeR)v2PCCnsObzL`BXM72drTb93OOD=!ZNl~Fgp{U;dj#NyD
z_G{!z4}qVvu2l4rl^xfK60E4POz5C$x>`*Fkz&0*MmWnmZ93I<irU=?pbAnog6qNi
zc+~*cSs@Ju&oOna6T~jSg2swVWxVy+Y^AN>t!Q#C<*dD4kET<>uBgqJn!$9s@5e0m
zIO`NMJ0wi(5BFjc>w&a4>wSkzEW3)=O6stvv2m%RZKqO?@}|NFrwWU>(uwHm++IR`
zD4R~?hOgZr2COWbD=K&=`Q%7=NQ&?cGhr?`Q%C@>FALGs=3_eCDx}AV2aLj<ORPg^
z^4DDGh}Y~@$w7n$D=_B37;}F$X{7dBrY>}ZJy^0M_Db4FOzC-1a!MzO&h8Q>J~CTV
zJO<sKL&OHq`~+fb%spcsqS3y@AMaO3e1?lZf1X#hnzOaBIqEn6Npd0QtLEaWo3pB%
z?!=c<;8in6jk}V|`QPfz|Dh%|@F=N1{;!1TN`8;VA)))gO-V+GYle?A{$D>e8RBD=
z86kSxU1K-)#3MwT48*7S(hu0W6TB*CyS`_p9l2$8pUm;v&i0a9MfE4Uoi_Pz@wSrR
l-&O7LP5D+rE2e42QhUdIZaRyfY`Oc+#g#O|#BR15<S+VBqyPW_

diff --git a/modules/preprocess/__pycache__/data_preprocess_step.cpython-37.pyc b/modules/preprocess/__pycache__/data_preprocess_step.cpython-37.pyc
deleted file mode 100644
index 7cc03aaf1cbb923ffe63201cb78425dbffbba992..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1456
zcmZ`&&5qkP5SFNaTlVfco1~YbL2qtryG_!bvPIDZJrroV=(ZOFg}`WNFC9q~NNN{r
z$fxAG?~ok(5<T}Bc<rTmg`PT;V|UXZ5*!U@hM$Jt48I->dJc~BhkyM1e&jg6+s$qe
zRKCKcPEmjZZsE-BmlSSE=ESwSS9qmA_dDG$f-;<kogNfX8PDTR4^dC%3F;BVFP(W0
z+%spKyg^jw>DV=gr&s!1RA;&5wLa6VwzeOd>anO6eA!5<xv0=~ga)8bSj{S6RnD10
zV|vPKR`7~Fp_<yD-TstHS~AUKZtr17f-A-K7?=7Lg?3&z8=~D60T;-UfOqY-WaX{=
zEC9dL!YtA;1Q0@WO*WoRAcnYgRy|0-hu(Ft@zK_Y{yVk+Z7HPh*uvItgEm}xn@IOz
z0K?XSgVu+`Hh`lxgi-4)d#$r{+o;7Xbh;Y68g}wPAHH^9yKoDRuj5VJ#%;79$l&d#
z>$?KU$S)>!$Kc7y!|7)@?|-o#^0LvqP$ws!PCuVMute3o&T5)Jr%R@OGzl+J$$*z{
zenTq8#P}8RWd^_&v?+9UL5qf&L`%vm43j1(MN{c<AZ^E(V}$E?&qSl^MrYmW1h5K9
z3&~9Xt-Tb$Ov;tTU^%+3OoG@PI?z}%rLDFe*`rx4SuI7*l$zF8>)~V~>1Eb=Y)pSL
z+2><7dz^I-Jv$_H>@WAtI5vINb=KPqsa!TCtF$s9uP||CqHU(i40BPUg=HFpm^O)Q
zs=RALbErfkbIaH65`9_};xd!0QEYr9JtRf?mYFb@jA|r+RTo@}%KprR+X|Tx;sMRz
zMiZM5ME;yf6S0auD;UVoVg*(lSYi9u5TT4;Q+;kC%%LMYqR&)kiL-3M3zlW$#H2R~
zlkeH1J-kM?y@!k~pnU{#WW_Bj?xHZh!iV>ZBR|5;pQ~9}=~+JeNinHrD{+3=%)ZAb
zCiw-sH@Wv6&!tdefgxDj87+&+t{c<@SSe;$^rm2H_IEjF|0r)}|A#d;$N%N9X{v8g
zI3#rMxIK~*;@ausivL&dSr73s%FYnA?WvI)d-4-R8~5dh_%V0rx{<6brn|hSHXXI4
zdhg7!kEi<{=t^up*q!X0|E}h?2X618_V_k@Thp$(Hkpp=8{%V|S^Q|r-Ip%zl&w_k
Irn^c00toD?WdHyG

diff --git a/modules/preprocess/data_preprocess_step.py b/modules/preprocess/data_preprocess_step.py
index 0c09fee..f2de1b9 100644
--- a/modules/preprocess/data_preprocess_step.py
+++ b/modules/preprocess/data_preprocess_step.py
@@ -6,6 +6,18 @@ from azureml.pipeline.core import PipelineData
 from azureml.pipeline.core import PipelineParameter
 
 def data_preprocess_step(raw_data_dir, compute_target):
+    '''
+    This step will take the raw data downloaded from the previous step and preprocess it by cropping 
+    it to a consistent size, shuffling the data, and splitting it into train, valid, and test directories.
+
+    :param raw_data_dir: The reference to the directory containing the raw data
+    :type raw_data_dir: DataReference
+    :param compute_target: The compute target to run the step on
+    :type compute_target: ComputeTarget
+    
+    :return: The preprocess step, step outputs dictionary (keys: train_dir, valid_dir, test_dir)
+    :rtype: PythonScriptStep, dict
+    '''
 
     run_config = RunConfiguration()
     run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=['opencv-python==4.1.1.26', 'imutils==0.5.3'])
@@ -35,6 +47,11 @@ def data_preprocess_step(raw_data_dir, compute_target):
         is_directory=True)
 
     outputs = [train_dir, valid_dir, test_dir]
+    outputs_map = { 
+        'train_dir': train_dir,
+        'valid_dir': valid_dir,
+        'test_dir': test_dir,
+    }
 
     step = PythonScriptStep(
         script_name='data_preprocess.py',
@@ -53,4 +70,4 @@ def data_preprocess_step(raw_data_dir, compute_target):
         allow_reuse=False
     )
 
-    return step, outputs
+    return step, outputs_map
diff --git a/modules/train/.ipynb_checkpoints/train-checkpoint.py b/modules/train/.ipynb_checkpoints/train-checkpoint.py
deleted file mode 100644
index 2536b21..0000000
--- a/modules/train/.ipynb_checkpoints/train-checkpoint.py
+++ /dev/null
@@ -1,11 +0,0 @@
- # Define arguments
-parser = argparse.ArgumentParser(description='Training arg parser')
-parser.add_argument('--train_dir', type=str, help='Directory where training data is stored')
-parser.add_argument('--valid_dir', type=str, help='Directory where validation data is stored')
-parser.add_argument('--output_dir', type=str, help='Directory to output the model to')
-args = parser.parse_args()
-
-# Get arguments from parser
-train_dir = args.train_dir
-valid_dir = args.valid_dir
-output_dir = args.output_dir
diff --git a/modules/train/.ipynb_checkpoints/train_step-checkpoint.py b/modules/train/.ipynb_checkpoints/train_step-checkpoint.py
deleted file mode 100644
index e92d2f0..0000000
--- a/modules/train/.ipynb_checkpoints/train_step-checkpoint.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import os
-from azureml.pipeline.steps import PythonScriptStep
-from azureml.core.runconfig import RunConfiguration
-from azureml.core.conda_dependencies import CondaDependencies
-from azureml.pipeline.core import PipelineData
-from azureml.pipeline.core import PipelineParameter
-from azureml.pipeline.steps import EstimatorStep
-from azureml.train.dnn import PyTorch
-
-def train_step(train_dir, valid_dir, compute_target):
-
-    num_epochs = PipelineParameter(name='num_epochs', default_value=25)
-    batch_size = PipelineParameter(name='batch_size', default_value=16)
-    learning_rate = PipelineParameter(name='learning_rate', default_value=0.001)
-    momentum = PipelineParameter(name='momentum', default_value=0.9)
-
-    model_dir = PipelineData(
-        name='model_dir', 
-        pipeline_output_name='model_dir',
-        datastore=train_dir.datastore,
-        output_mode='mount',
-        is_directory=True)
-
-    outputs = [model_dir]
-
-    estimator = PyTorch(
-        source_directory=os.path.dirname(os.path.abspath(__file__)),
-        entry_script='train.py',
-        framework_version='1.3',
-        compute_target=compute_target,
-        use_gpu=True)
-
-    step = EstimatorStep(
-        estimator=estimator,
-        estimator_entry_script_arguments=[
-            '--train_dir', train_dir, 
-            '--valid_dir', valid_dir, 
-            '--output_dir', model_dir, 
-            '--num_epochs', num_epochs, 
-            '--batch_size', batch_size,
-            '--learning_rate', learning_rate, 
-            '--momentum', momentum
-        ],
-        inputs=[train_dir, valid_dir],
-        compute_target=compute_target,
-        outputs=outputs,
-        allow_reuse=False)
-
-    return step, outputs
diff --git a/modules/train/__pycache__/train_step.cpython-36.pyc b/modules/train/__pycache__/train_step.cpython-36.pyc
deleted file mode 100644
index af535325addce24c617b094b77bf34bc540e7917..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1463
zcmY*Z&u`pB6t-vmdv}v;n-&TZ<cN{HE7WV%BDHWpqFRx1v4pINXLi@Yzce#$v@7RS
zuK7El{|j6=&J}Tp#Dy#3z<aZC5^T-q@4b0`^X7YFKb=malaoJx_}R0pKdgHf0sbWn
zxdC7nvr8+*Cn@cUq{K!(Do2%*I)*!?TY0HxxLf*FkOqc(z{4~&+=q9gG%`FW$JHdA
z01sL8la)@HeQL$yYgo;C9ozb7xmBB{KFvkZs#C>V^nKUWFPnN*tUEzf(bV9ZfrHUk
zyyZ3HbzX1@&dIW9d0Eu_6;%|2275^bt+?U>oYSwQDk`cPag*L(Zr?N_-{d1K0<JNP
zBN#7X$X@^y=)edYU6IQXaEG~uJHS2W8}0%RU{4t1DVs$s>aBCa#%#i-SMH_X`)m6$
zP$T7>yXT%kUj^^%cQ!j<hdqI~BZv!C*oSP^TkJt^vxmLUj{6aN)I02P@3IeiZ+*~P
zy}c$q=>zs5dvfJnMt#%=D*`%x^jb%CS7p3y@{PQH0=36PJ2h-xPZ?j)u2k7uT6SDV
zXH?~zOcpzSjhofel2cI^^*RHQ`TF1YfB*jN+46-Bs;1(#>Z<i`zvA`R3msMs<7LJQ
zq1~$KYV{^|1*)cx+nWw#O{dyUWoGG+K|drY&h_}lj~Vo&keGz$FtyzUO3<R7x7*$5
zi}};o)km`FM9%L*^aKhK+f16%k<M1w)r&^_n7!pf!oko7c~il%JX2Jxxzc_odA4r5
z-8e~1dQ64}h)^(NOp@fLCY%E6PC&HoWZE4j$^SIkMM*M<^L6a$kPjzIpWM>ye#seB
z)uEbFdqoX}N`XDn{=NdK$FwY)i%f8+JB|c=otAJBju8TkC5_ZhOVvjEP_WV0r)Sc5
zf-GAVCC@V9LufqK;q9h%c)Mk@rU)_P6yXp7&w!XAe1h;XfOaH&fbT8w85sY3v#4sd
zpgY7Iy0`dVg6NC$W^>UkXjLXd>t(`t)hwXHUCHIbRItYk^Ff1kv@Z+I2y)Bk02Z0p
z<|f3(+kp`f53C2qMhpDJftlg<h-c;kB0h%<@qsvjhki_Vo#0hDA4;0zN>bp@?QSxM
z^Eek>ots~az^`9_I=pEnVTRF+-TTQ12csShQMdUqeBUV3GE_gE4N+#Zb5_@;fu>R;
Y={<-sZSZWb*>f8#i46(%iVR5p171e0!~g&Q

diff --git a/modules/train/__pycache__/train_step.cpython-37.pyc b/modules/train/__pycache__/train_step.cpython-37.pyc
deleted file mode 100644
index 1bc1368fdf278747c158a52c3c0bc1ad9b807ee3..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1484
zcmY*Z&2Qv16i+<)o|(3@bQcyR=#e00MhK2*7qJVsT`Ia<f{>NQo|%+<$#z=JD7|s*
z|G=JL{};G$^py*hxNt=rc+b<c#gSir?<3F8zvuXi=`^xntUvwp`=33_`U90=5CEUU
zlxq;oVs>ez_$H-Yk(AhoN9Cw;QpaGYbSp3Q40cPu3evz}4{(@<2K(@Bltu;z<+z%p
z6Tl&hezejlvrnved=0BvFJoID9B<URsZVlIwCY6h7H!{j^|Pj47OPHVs%UDk&A`C2
z=e*@L<8@we3C78B(ekpW`SVO=7&Pd|naC=xxB%nyOR0(~Q;oPu?;meoH6mZ<BP;^O
z80G=YXE5b2AQbSx2pe6I%MoCQxduCcJ?0zi0uEqL7~?6MMJ(#AGs4Df!lqa5rQiE2
z`!Y}?<(#=cc}Dswcx}J7*&f^P3B(;hT&Ti6WV7C44|<!u)%)zQAF+qM!yff6d%O2m
zd%e}$E7FrbVDGTUSKejRM}4p)z~j3wbyRm1<!zI%<@IBzJto?zVe@**_%iECMbERc
z<2pLcRKBLN*z#-Kte%!U6LnFqD3Huo|GxSA_ZO$fPjyf=6|YrSt$zCz$6rr%ST&57
zlodj|Rnyh#RqP66O&_*59iUC8+D=ikbjY9|5*X)td}GH9dQwPC!gFwKwt*5^QP11W
zcJ%T5li1Y<vgt(5??Uth3K1JB&FM&IOYG`JBYvRgTu3+=dM|G(Se8?ji4|Ac?<A+I
zw%d-A#H7b$r~nBCGs+}MZfZgoXm<pnc1P28KS}<_(Kbqwft;^mPltRsS^DS}()%S-
zsH#IYrS^&%3Y7wTr2Sn5QjfE;Y%ZwaP<I>&_&Y7(qBukmU@U2*cG^s>wGRawe*NrJ
z8cRUwvM4#F!iUg!ti#()>+p8VW=#=d#wm(@6nF;248{8>-UFc>2|wT)OMD2*Ki@5?
zS}pR$w-UHsoHgr<Zt*qzYN9yjN6FEPA{UKpmJkB1U1U|63}-<mj91M9THcjhE=)-~
zNpS(_xQ=#pqKbEud;-EE6Wct5*!Vaw1LA@9!04!fojBkb9*=m&7ZC9gWQg~~5xn%H
zY}*N5mGhybIj$rH-gbA+91iGQbaig-8iCh={$TjhIKjg*%I@7d!ojGAL)2}44BvIm
p^bp&T&W0$n**U9g(@|5Y;rO12F>UbKj<Y8=Xc9XV>=+p&`46Z;w2}Y-

diff --git a/modules/train/train_step.py b/modules/train/train_step.py
index e92d2f0..9f50c18 100644
--- a/modules/train/train_step.py
+++ b/modules/train/train_step.py
@@ -8,6 +8,20 @@ from azureml.pipeline.steps import EstimatorStep
 from azureml.train.dnn import PyTorch
 
 def train_step(train_dir, valid_dir, compute_target):
+    '''
+    This step will fine-tune a RESNET-18 model on our dataset using PyTorch. 
+    It will use the corresponding input image directories as training and validation data.
+
+    :param train_dir: The reference to the directory containing the training data
+    :type train_dir: DataReference
+    :param valid_dir: The reference to the directory containing the validation data
+    :type valid_dir: DataReference
+    :param compute_target: The compute target to run the step on
+    :type compute_target: ComputeTarget
+    
+    :return: The preprocess step, step outputs dictionary (keys: model_dir)
+    :rtype: EstimatorStep, dict
+    '''
 
     num_epochs = PipelineParameter(name='num_epochs', default_value=25)
     batch_size = PipelineParameter(name='batch_size', default_value=16)
@@ -22,6 +36,7 @@ def train_step(train_dir, valid_dir, compute_target):
         is_directory=True)
 
     outputs = [model_dir]
+    outputs_map = { 'model_dir': model_dir }
 
     estimator = PyTorch(
         source_directory=os.path.dirname(os.path.abspath(__file__)),
@@ -46,4 +61,4 @@ def train_step(train_dir, valid_dir, compute_target):
         outputs=outputs,
         allow_reuse=False)
 
-    return step, outputs
+    return step, outputs_map
diff --git a/object-recognition-pipeline.py b/object-recognition-pipeline.py
index 847671d..73c9855 100644
--- a/object-recognition-pipeline.py
+++ b/object-recognition-pipeline.py
@@ -41,16 +41,16 @@ datastore = DataReference(datastore, mode='mount')
 data_ingestion_step, data_ingestion_outputs = data_ingestion_step(datastore, cpu_compute_target)
 
 # Step 2: Data preprocessing 
-data_preprocess_step, data_preprocess_outputs = data_preprocess_step(data_ingestion_outputs[0], cpu_compute_target)
+data_preprocess_step, data_preprocess_outputs = data_preprocess_step(data_ingestion_outputs['raw_data_dir'], cpu_compute_target)
 
 # Step 3: Train Model
-train_step, train_outputs = train_step(data_preprocess_outputs[0], data_preprocess_outputs[1], gpu_compute_target)
+train_step, train_outputs = train_step(data_preprocess_outputs['train_dir'], data_preprocess_outputs['valid_dir'], gpu_compute_target)
 
 # Step 4: Evaluate Model
-evaluate_step, evaluate_outputs = evaluate_step(train_outputs[0], data_preprocess_outputs[2], gpu_compute_target)
+evaluate_step, evaluate_outputs = evaluate_step(train_outputs['model_dir'], data_preprocess_outputs['test_dir'], gpu_compute_target)
 
 # Step 5: Deploy Model
-deploy_step, deploy_outputs = deploy_step(train_outputs[0], evaluate_outputs[0], data_preprocess_outputs[2], cpu_compute_target)
+deploy_step, deploy_outputs = deploy_step(train_outputs['model_dir'], evaluate_outputs['accuracy_file'], data_preprocess_outputs['test_dir'], cpu_compute_target)
 
 # Submit pipeline
 print('Submitting pipeline ...')