Add more configurations to gather better perf metrics (#59)

* Add a lot more perf metrics and granularity * Add the Tensor Input Data Type * Make the existing tests pass * Add dynamic DLL loading at runtime * Fix warnings * Fix x86 warning * Update README.md and UseGPU() function * Add more tests for garbage input with the new flags * Add tests for garbage data * Revert temporary code * Fixed typo * Change old code * Change tabs for spaces * Fix bad IF statement * Fix bad IF statement * Revert non-backward-compatible change * Revert non-backward-compatible changes * Fix typo in README.md * Fix "totalTime" showing "nan" in the CSV * Retarget the test Windows SDK If both projects target the same Windows SDK, we don't need to install an older SDK just to run the tests. * Fix the random garbage generator * Fix the last failing test
2018-10-19 16:47:52 -07:00 · 2018-10-19 16:47:52 -07:00 · d7492d3da8
--- a/SharedContent/models/keras_Add_ImageNet_small.onnx
+++ b/SharedContent/models/keras_Add_ImageNet_small.onnx
--- a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
+++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
@ -4,6 +4,9 @@
 #include <processthreadsapi.h>
 #include <winnt.h>
 #include <Winbase.h>
+#include <fstream>
+#include <algorithm>
+#include <vector>

 using namespace Microsoft::VisualStudio::CppUnitTestFramework;
 static int RunProc(LPWSTR commandLine)
@ -25,57 +28,285 @@ static int RunProc(LPWSTR commandLine)

 namespace WinMLRunnerTest
 {
+    static const std::wstring CURRENT_PATH = FileHelper::GetModulePath();
+    static const std::wstring EXE_PATH = CURRENT_PATH + L"WinMLRunner.exe";
+    static const std::wstring INPUT_FOLDER_PATH = CURRENT_PATH + L"test_folder_input";
+    static const std::wstring OUTPUT_PATH = CURRENT_PATH + L"test_output.csv";
+
+    static std::wstring BuildCommand(std::initializer_list<std::wstring>&& arguments)
+    {
+        std::wstring commandLine;
+
+        for (const std::wstring& argument : arguments)
+        {
+            commandLine += argument + L' ';
+        }
+
+        return commandLine;
+    }
+
+    static size_t GetOutputCSVLineCount()
+    {
+        std::ifstream fin;
+        fin.open(OUTPUT_PATH);
+        return std::count(std::istreambuf_iterator<char>(fin), std::istreambuf_iterator<char>(), '\n');
+    }
+
+    static void RemoveModelsFromFolder(std::initializer_list<std::string>&& modelList)
+    {
+        //make test_models folder
+        std::string mkFolderCommand = "mkdir " + std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
+        system(mkFolderCommand.c_str());
+
+        //copy models from list to test_folder_input
+        for (auto model : modelList)
+        {
+            std::string copyCommand = "Copy ";
+            copyCommand += model;
+            copyCommand += ' ' + std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
+            system(copyCommand.c_str());
+        }
+    }
+
 	TEST_CLASS(GarbageInputTest)
 	{
 	public:
+        TEST_CLASS_INITIALIZE(SetupClass)
+        {
+            // Make test_folder_input folder before starting the tests
+            std::string mkFolderCommand = "mkdir " + std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
+            system(mkFolderCommand.c_str());
+
+            std::vector<std::string> models = { "SqueezeNet.onnx", "keras_Add_ImageNet_small.onnx" };
+
+            // Copy models from list to test_folder_input
+            for (auto model : models)
+            {
+                std::string copyCommand = "Copy ";
+                copyCommand += model;
+                copyCommand += ' ' + std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
+                system(copyCommand.c_str());
+            }
+        }
+
+        TEST_CLASS_CLEANUP(CleanupClass)
+        {
+            // Delete test_folder_input folder after all tests have been run
+            std::string copyCommand = "rd /s /q ";
+            copyCommand += std::string(INPUT_FOLDER_PATH.begin(), INPUT_FOLDER_PATH.end());
+            system(copyCommand.c_str());
+        }
+
+        TEST_METHOD_CLEANUP(CleanupMethod)
+        {
+            // Remove output.csv after each test
+            std::remove(std::string(OUTPUT_PATH.begin(), OUTPUT_PATH.end()).c_str());
+        }
+
 		TEST_METHOD(GarbageInputCpuAndGpu)
 		{
-            auto const curPath = FileHelper::GetModulePath();
-            std::wstring command = curPath +
-                L"./WinMLRunner " + L"-model " + curPath + L"SqueezeNet.onnx";
-            Assert::AreEqual(0 , RunProc((wchar_t *)command.c_str()));
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(3), GetOutputCSVLineCount());
 		}

        TEST_METHOD(GarbageInputOnlyCpu)
        {
-            auto const curPath = FileHelper::GetModulePath();
-            std::wstring command = curPath +
-                L"./WinMLRunner " + L"-model " + curPath + L"SqueezeNet.onnx " + L"-CPU";
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-CPU" });
            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
        }

        TEST_METHOD(GarbageInputOnlyGpu)
        {
-            auto const curPath = FileHelper::GetModulePath();
-            std::wstring command = curPath +
-                L"./WinMLRunner " + L"-model " + curPath + L"SqueezeNet.onnx " + L"-GPU";
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-GPU" });
            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputCpuDeviceCpuBoundRGBImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-CPU", L"-CPUBoundInput", L"-RGB" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputCpuDeviceCpuBoundBGRImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-CPU", L"-CPUBoundInput", L"-BGR" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputCpuDeviceCpuBoundTensor)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-CPU", L"-CPUBoundInput", L"-tensor" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputCpuDeviceGpuBoundRGBImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-CPU", L"-GPUBoundInput", L"-RGB" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputCpuDeviceGpuBoundBGRImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-CPU", L"-GPUBoundInput", L"-BGR" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputCpuDeviceGpuBoundTensor)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-CPU", L"-GPUBoundInput", L"-tensor" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputGpuDeviceCpuBoundRGBImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-RGB" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputGpuDeviceCpuBoundBGRImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-BGR" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputGpuDeviceCpuBoundTensor)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-GPU", L"-CPUBoundInput", L"-tensor" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputGpuDeviceGpuBoundRGBImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-RGB" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputGpuDeviceGpuBoundBGRImage)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-BGR" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputGpuDeviceGpuBoundTensor)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-output", OUTPUT_PATH, L"-perf", L"-GPU", L"-GPUBoundInput", L"-tensor" });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(GarbageInputAllPermutations)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring command = BuildCommand({
+                EXE_PATH,
+                L"-model",
+                modelPath,
+                L"-output",
+                OUTPUT_PATH,
+                L"-perf",
+                L"-CPU",
+                L"-GPU",
+                L"-CPUBoundInput",
+                L"-GPUBoundInput",
+                L"-RGB",
+                L"-BGR",
+                L"-tensor"
+            });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(13), GetOutputCSVLineCount());
        }

        TEST_METHOD(RunAllModelsInFolderGarbageInput)
        {
-            auto const curPath = FileHelper::GetModulePath();
-            auto modelList = { "SqueezeNet.onnx",
-                "Add_ImageNet1920.onnx" };
-
-            //make test_models folder
-            std::wstring testFolderName = L"test_folder_input";
-            std::string mkFolderCommand = "mkdir " + std::string(testFolderName.begin(), testFolderName.end());
-            system(mkFolderCommand.c_str());
-
-            //copy models from list to test_folder_input
-            for (auto model : modelList)
-            {
-                std::string copyCommand = "Copy";
-                copyCommand += " .\\";
-                copyCommand += model;
-                copyCommand += " .\\" + std::string(testFolderName.begin(), testFolderName.end());
-                system(copyCommand.c_str());
-            }
-
-            std::wstring command = curPath +
-                L"./WinMLRunner " + L"-folder " + testFolderName;
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-folder", INPUT_FOLDER_PATH, L"-output", OUTPUT_PATH, L"-perf" });
            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(5), GetOutputCSVLineCount());
+        }
+
+        TEST_METHOD(RunAllModelsInFolderGarbageInputWithAllPermutations)
+        {
+            const std::wstring command = BuildCommand({
+                EXE_PATH,
+                L"-folder",
+                INPUT_FOLDER_PATH,
+                L"-output",
+                OUTPUT_PATH,
+                L"-perf",
+                L"-CPU",
+                L"-GPU",
+                L"-CPUBoundInput",
+                L"-GPUBoundInput",
+                L"-RGB",
+                L"-BGR",
+                L"-tensor"
+                });
+            Assert::AreEqual(0, RunProc((wchar_t *)command.c_str()));
+
+            // We need to expect one more line because of the header
+            Assert::AreEqual(static_cast<size_t>(25), GetOutputCSVLineCount());
        }
 	};

--- a/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj
+++ b/Testing/WinMLRunnerTest/WinMLRunnerTest.vcxproj
@ -204,6 +204,17 @@
      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
    </Content>
+    <Content Include="..\..\SharedContent\models\keras_Add_ImageNet_small.onnx">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</ExcludedFromBuild>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</DeploymentContent>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</ExcludedFromBuild>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</DeploymentContent>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
+      <DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+    </Content>
  </ItemGroup>
  <ItemGroup>
    <Content Include="..\..\SharedContent\media\fish.png">
--- a/Tools/WinMLRunner/BindingUtilities.h
+++ b/Tools/WinMLRunner/BindingUtilities.h
@ -1,139 +1,60 @@
 #pragma once
+#include <random>
+#include <time.h>
 #include "Common.h"
 #include "ModelBinding.h"
-using namespace winrt::Windows::Graphics::Imaging;
+#include "CommandLineArgs.h"
+
 using namespace Windows::Media;
-using namespace winrt::Windows::Foundation::Collections;
 using namespace Windows::Storage;
 using namespace winrt::Windows::AI::MachineLearning;
+using namespace winrt::Windows::Foundation;
+using namespace winrt::Windows::Foundation::Collections;
+using namespace winrt::Windows::Graphics::DirectX;
+using namespace winrt::Windows::Graphics::Imaging;

 namespace BindingUtilities
 {
-    void BindTensorsFromGarbageData(LearningModelBinding context, LearningModel model)
+    SoftwareBitmap GenerateGarbageImage(const TensorFeatureDescriptor& imageDescriptor, InputDataType inputDataType)
    {
-        for (auto&& description : model.InputFeatures())
+        assert(inputDataType != InputDataType::Tensor);
+
+        // We assume NCHW and NCDHW
+        uint64_t width = imageDescriptor.Shape().GetAt(imageDescriptor.Shape().Size() - 1);
+        uint64_t height = imageDescriptor.Shape().GetAt(imageDescriptor.Shape().Size() - 2);
+        uint64_t channelCount = imageDescriptor.Shape().GetAt(1);
+        uint64_t batchCount = imageDescriptor.Shape().GetAt(0);
+
+        // If the batchCount is infinite, we can put as many images as we want
+        if (batchCount >= ULLONG_MAX)
        {
-            if (description == nullptr)
-            {
-                std::cout << "BindingUtilities: Learning model has no binding description." << std::endl;
-                throw hresult_invalid_argument();
-            }
-
-            hstring name = description.Name();
-            TensorFeatureDescriptor tensorDescriptor = nullptr;
-            try
-            {
-                tensorDescriptor = description.as<TensorFeatureDescriptor>();
-            }
-            catch (...)
-            {
-                std::cout << "BindingUtilities: Input Descriptor type isn't tensor." << std::endl;
-                throw;
-            }
-
-            TensorKind tensorKind = tensorDescriptor.TensorKind();
-            switch (tensorKind)
-            {
-                case TensorKind::Undefined:
-                {
-                    std::cout << "BindingUtilities: TensorKind is undefined." << std::endl;
-                    throw hresult_invalid_argument();
-                }
-                case TensorKind::Float:
-                {
-                    ModelBinding<float> binding(description);
-                    ITensor tensor = TensorFloat::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Float16:
-                {
-                    ModelBinding<float> binding(description);
-                    ITensor tensor = TensorFloat16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Double:
-                {
-                    ModelBinding<double> binding(description);
-                    ITensor tensor = TensorDouble::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int8:
-                {
-                    ModelBinding<uint8_t> binding(description);
-                    ITensor tensor = TensorInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt8:
-                {
-                    ModelBinding<uint8_t> binding(description);
-                    ITensor tensor = TensorUInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int16:
-                {
-                    ModelBinding<int16_t> binding(description);
-                    ITensor tensor = TensorInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt16:
-                {
-                    ModelBinding<uint16_t> binding(description);
-                    ITensor tensor = TensorUInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int32:
-                {
-                    ModelBinding<int32_t> binding(description);
-                    ITensor tensor = TensorInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt32:
-                {
-                    ModelBinding<uint32_t> binding(description);
-                    ITensor tensor = TensorUInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int64:
-                {
-                    ModelBinding<int64_t> binding(description);
-                    ITensor tensor = TensorInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt64:
-                {
-                    ModelBinding<uint64_t> binding(description);
-                    ITensor tensor = TensorUInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::String:
-                {
-                    ModelBinding<hstring> binding(description);
-                    ITensor tensor = TensorString::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                default:
-                {
-                    std::cout << "BindingUtilities: TensorKind binding has not been implemented." << std::endl;
-                    throw hresult_not_implemented();
-                }
-            }
+            batchCount = 3;
        }
+
+        // We have to create RGBA8 or BGRA8 images, so we need 4 channels
+        uint32_t totalByteSize = static_cast<uint32_t>(width) * static_cast<uint32_t>(height) * 4;
+
+        // Generate random values for the image
+        std::vector<uint8_t> data(totalByteSize);
+        static std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned int> randomBitsEngine;
+        randomBitsEngine.seed(static_cast<unsigned int>(time(nullptr)));
+        std::generate(data.begin(), data.end(), randomBitsEngine);
+
+        // Write the values to a buffer
+        winrt::array_view<const uint8_t> dataView(data);
+        InMemoryRandomAccessStream dataStream;
+        DataWriter dataWriter(dataStream);
+        dataWriter.WriteBytes(dataView);
+        IBuffer buffer = dataWriter.DetachBuffer();
+
+        // Create the software bitmap
+        return SoftwareBitmap::CreateCopyFromBuffer(buffer, TypeHelper::GetBitmapPixelFormat(inputDataType), static_cast<int32_t>(width), static_cast<int32_t>(height));
    }
-    
-    VideoFrame LoadImageFile(hstring filePath)
+
+    SoftwareBitmap LoadImageFile(const hstring& filePath, InputDataType inputDataType)
    {
+        assert(inputDataType != InputDataType::Tensor);
+
        try
        {
            // open the file
@ -143,11 +64,9 @@ namespace BindingUtilities
            // Create the decoder from the stream
            BitmapDecoder decoder = BitmapDecoder::CreateAsync(stream).get();
            // get the bitmap
-            SoftwareBitmap softwareBitmap = decoder.GetSoftwareBitmapAsync().get();
-            // load a videoframe from it
-            VideoFrame inputImage = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap);
-            // all done
-            return inputImage;
+            SoftwareBitmap softwareBitmap = decoder.GetSoftwareBitmapAsync(TypeHelper::GetBitmapPixelFormat(inputDataType), BitmapAlphaMode::Ignore).get();
+            
+            return softwareBitmap;
        }
        catch (...)
        {
@ -156,6 +75,21 @@ namespace BindingUtilities
        }
    }

+    VideoFrame CreateVideoFrame(const SoftwareBitmap& softwareBitmap, InputBindingType inputBindingType, InputDataType inputDataType)
+    {
+        VideoFrame inputImage = VideoFrame::CreateWithSoftwareBitmap(softwareBitmap);
+
+        if (inputBindingType == InputBindingType::GPU)
+        {
+            VideoFrame gpuImage = VideoFrame::CreateAsDirect3D11SurfaceBacked(TypeHelper::GetDirectXPixelFormat(inputDataType), softwareBitmap.PixelWidth(), softwareBitmap.PixelHeight());
+            inputImage.CopyToAsync(gpuImage).get();
+
+            return gpuImage;
+        }
+
+        return inputImage;
+    }
+
    std::vector<std::string> ReadCsvLine(std::ifstream& fileStream)
    {
        std::vector<std::string> elementStrings;
@ -184,7 +118,7 @@ namespace BindingUtilities
            throw hresult_invalid_argument(L"CSV Input is size/shape is different from what model expects");
        }
        T* data = binding.GetData();
-        for (auto &elementString : elementStrings)
+        for (const auto &elementString : elementStrings)
        {
            T value;
            std::stringstream(elementString) >> value;
@ -193,8 +127,7 @@ namespace BindingUtilities
        }
    }

-    // Binds tensor floats, ints, doubles from CSV data.
-    void BindCSVDataToContext(LearningModelBinding context, LearningModel model, std::wstring csvFilePath)
+    std::vector<std::string> ParseCSVElementStrings(const std::wstring& csvFilePath)
    {
        std::ifstream fileStream;
        fileStream.open(csvFilePath);
@ -202,156 +135,148 @@ namespace BindingUtilities
        {
            ThrowFailure(L"BindingUtilities: could not open data file.");
        }
-        for (auto&& description : model.InputFeatures())
-        {
-            if (description == nullptr)
-            {

-                std::cout << "BindingUtilities: Learning model has no binding description." << std::endl;
+        std::vector<std::string> elementStrings = ReadCsvLine(fileStream);
+
+        return elementStrings;
+    }
+
+    // Binds tensor floats, ints, doubles from CSV data.
+    ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const std::wstring& csvFilePath)
+    {
+        auto name = description.Name();
+        auto tensorDescriptor = description.try_as<TensorFeatureDescriptor>();
+
+        if (!tensorDescriptor)
+        {
+            std::cout << "BindingUtilities: Input Descriptor type isn't tensor." << std::endl;
+            throw;
+        }
+
+        switch (tensorDescriptor.TensorKind())
+        {
+            case TensorKind::Undefined:
+            {
+                std::cout << "BindingUtilities: TensorKind is undefined." << std::endl;
                throw hresult_invalid_argument();
            }
-
-            hstring name = description.Name();
-            TensorFeatureDescriptor tensorDescriptor = description.as<TensorFeatureDescriptor>();
-            TensorKind tensorKind = tensorDescriptor.TensorKind();
-
-            std::vector<std::string> elementStrings = ReadCsvLine(fileStream);
-            switch (tensorKind)
+            case TensorKind::Float:
            {
-                case TensorKind::Undefined:
-                {
-                    std::cout << "BindingUtilities: TensorKind is undefined." << std::endl;
-                    throw hresult_invalid_argument();
-                }
-                case TensorKind::Float:
-                {
-                    ModelBinding<float> binding(description);
-
-                    WriteDataToBinding<float>(elementStrings, binding);
-                    ITensor tensor = TensorFloat::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Float16:
-                {
-                    ModelBinding<float> binding(description);
-                    WriteDataToBinding<float>(elementStrings, binding);
-                    ITensor tensor = TensorFloat16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Double:
-                {
-                    ModelBinding<double> binding(description);
-                    WriteDataToBinding<double>(elementStrings, binding);
-                    ITensor tensor = TensorDouble::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int8:
-                {
-                    ModelBinding<uint8_t> binding(description);
-                    WriteDataToBinding<uint8_t>(elementStrings, binding);
-                    ITensor tensor = TensorInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt8:
-                {
-                    ModelBinding<uint8_t> binding(description);
-                    WriteDataToBinding<uint8_t>(elementStrings, binding);
-                    ITensor tensor = TensorUInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int16:
-                {
-                    ModelBinding<int16_t> binding(description);
-                    WriteDataToBinding<int16_t>(elementStrings, binding);
-                    ITensor tensor = TensorInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt16:
-                {
-                    ModelBinding<uint16_t> binding(description);
-                    WriteDataToBinding<uint16_t>(elementStrings, binding);
-                    ITensor tensor = TensorUInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int32:
-                {
-                    ModelBinding<int32_t> binding(description);
-                    WriteDataToBinding<int32_t>(elementStrings, binding);
-                    ITensor tensor = TensorInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt32:
-                {
-                    ModelBinding<uint32_t> binding(description);
-                    WriteDataToBinding<uint32_t>(elementStrings, binding);
-                    ITensor tensor = TensorUInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::Int64:
-                {
-                    ModelBinding<int64_t> binding(description);
-                    WriteDataToBinding<int64_t>(elementStrings, binding);
-                    ITensor tensor = TensorInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                case TensorKind::UInt64:
-                {
-                    ModelBinding<uint64_t> binding(description);
-                    WriteDataToBinding<uint64_t>(elementStrings, binding);
-                    ITensor tensor = TensorUInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
-                    context.Bind(name, tensor);
-                }
-                break;
-                default:
-                {
-                    std::cout << "BindingUtilities: TensorKind has not been implemented." << std::endl;
-                    throw hresult_not_implemented();
-                }
+                ModelBinding<float> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<float>(elementStrings, binding);
+                return TensorFloat::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
            }
+            break;
+            case TensorKind::Float16:
+            {
+                ModelBinding<float> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<float>(elementStrings, binding);
+                return TensorFloat16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::Double:
+            {
+                ModelBinding<double> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<double>(elementStrings, binding);
+                return TensorDouble::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::Int8:
+            {
+                ModelBinding<uint8_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<uint8_t>(elementStrings, binding);
+                return TensorInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::UInt8:
+            {
+                ModelBinding<uint8_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<uint8_t>(elementStrings, binding);
+                return TensorUInt8Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::Int16:
+            {
+                ModelBinding<int16_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<int16_t>(elementStrings, binding);
+                return TensorInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::UInt16:
+            {
+                ModelBinding<uint16_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<uint16_t>(elementStrings, binding);
+                return TensorUInt16Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::Int32:
+            {
+                ModelBinding<int32_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<int32_t>(elementStrings, binding);
+                return TensorInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::UInt32:
+            {
+                ModelBinding<uint32_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<uint32_t>(elementStrings, binding);
+                return TensorUInt32Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::Int64:
+            {
+                ModelBinding<int64_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<int64_t>(elementStrings, binding);
+                return TensorInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
+            case TensorKind::UInt64:
+            {
+                ModelBinding<uint64_t> binding(description);
+                auto elementStrings = csvFilePath.empty() ? std::vector<std::string>(binding.GetDataBufferSize()) : ParseCSVElementStrings(csvFilePath);
+                WriteDataToBinding<uint64_t>(elementStrings, binding);
+                return TensorUInt64Bit::CreateFromArray(binding.GetShapeBuffer(), binding.GetDataBuffer());
+            }
+            break;
        }
+
+        std::cout << "BindingUtilities: TensorKind has not been implemented." << std::endl;
+        throw hresult_not_implemented();
    }

-    void BindImageToContext(LearningModelBinding context, LearningModel model, std::wstring imagePath)
+    ImageFeatureValue CreateBindableImage(const ILearningModelFeatureDescriptor& featureDescriptor, const std::wstring& imagePath, InputBindingType inputBindingType, InputDataType inputDataType)
    {
-        context.Clear();
-        for (auto&& description : model.InputFeatures())
+        auto imageDescriptor = featureDescriptor.try_as<TensorFeatureDescriptor>();
+
+        if (!imageDescriptor)
        {
-            hstring name = description.Name();
-            auto Kind = description.Kind();
-            auto videoFrame = LoadImageFile(imagePath.c_str());
-            if (videoFrame == nullptr)
-            {
-                std::cout << "BindingUtilities: Cannot bind image to LearningModelBinding." << std::endl;
-                std::cout << std::endl;
-                throw_hresult(E_FAIL);
-            }
-            try
-            {
-                auto featureValue = ImageFeatureValue::CreateFromVideoFrame(videoFrame);
-                context.Bind(name, featureValue);
-            }
-            catch (hresult_error hr)
-            {
-                std::wcout << hr.message().c_str() << std::endl;
-                throw;
-            }
+            std::cout << "BindingUtilities: Input Descriptor type isn't tensor." << std::endl;
+            throw;
        }
+
+        auto softwareBitmap = imagePath.empty()
+            ? GenerateGarbageImage(imageDescriptor, inputDataType)
+            : LoadImageFile(imagePath.c_str(), inputDataType);
+
+        auto videoFrame = CreateVideoFrame(softwareBitmap, inputBindingType, inputDataType);
+
+        return ImageFeatureValue::CreateFromVideoFrame(videoFrame);
    }

-    template< typename K, typename V>
+    template<typename K, typename V>
    void OutputSequenceBinding(IMapView<hstring, Windows::Foundation::IInspectable> results, hstring name)
    {
-        auto map = results.Lookup(name).as<IVectorView<IMap<int64_t, float>>>().GetAt(0);
+        auto map = results.Lookup(name).as<IVectorView<IMap<K, V>>>().GetAt(0);
        auto iter = map.First();

        K maxKey = -1;
@ -370,9 +295,12 @@ namespace BindingUtilities
        std::cout << " " << maxKey << " " << maxVal << std::endl;
    }

-    void PrintEvaluationResults(LearningModel model, CommandLineArgs args, IMapView<hstring, Windows::Foundation::IInspectable> results)
+    void PrintEvaluationResults(const LearningModel& model, const CommandLineArgs& args, const IMapView<hstring, Windows::Foundation::IInspectable>& results)
    {
+        if (args.Silent()) return;
+        
        std::cout << "Outputting results.. " << std::endl;
+        
        for (auto&& desc : model.OutputFeatures())
        {
            if (desc.Kind() == LearningModelFeatureKind::Tensor)
@ -447,18 +375,4 @@ namespace BindingUtilities
            std::cout << std::endl;
        }
    }
-
-    void BindGarbageDataToContext(LearningModelBinding context, LearningModel model)
-    {
-        context.Clear();
-        try
-        {
-            BindTensorsFromGarbageData(context, model);
-        }
-        catch (...)
-        {
-            std::cout << "Could not bind from garbage data. Currently only supports binding garbage data for tensor inputs." << std::endl;
-            throw;
-        }
-    }
 };
--- a/Tools/WinMLRunner/CommandLineArgs.cpp
+++ b/Tools/WinMLRunner/CommandLineArgs.cpp
@ -15,9 +15,17 @@ void CommandLineArgs::PrintUsage() {
    std::cout << "  -GPU : run model on default GPU" << std::endl;
    std::cout << "  -GPUHighPerformance : run model on GPU with highest performance" << std::endl;
    std::cout << "  -GPUMinPower : run model on GPU with the least power" << std::endl;
+    std::cout << "  -CPUBoundInput : bind the input to the CPU" << std::endl;
+    std::cout << "  -GPUBoundInput : bind the input to the GPU" << std::endl;
+    std::cout << "  -RGB : load the input as an RGB image" << std::endl;
+    std::cout << "  -BGR : load the input as a BGR image" << std::endl;
+    std::cout << "  -tensor : load the input as a tensor" << std::endl;
    std::cout << "  -perf : capture timing measurements" << std::endl;
    std::cout << "  -iterations : # times perf measurements will be run/averaged" << std::endl;
    std::cout << "  -input <fully qualified path>: binds image or CSV to model" << std::endl;
+    std::cout << "  -output <fully qualified path>: csv file to write the perf results to" << std::endl;
+    std::cout << "  -IgnoreFirstRun : ignore the first run in the perf results when calculating the average" << std::endl;
+    std::cout << "  -silent: only errors are printed to the console" << std::endl;
    std::cout << "  -debug: print trace logs" << std::endl;
 }

@ -35,17 +43,14 @@ CommandLineArgs::CommandLineArgs()
        else if ((_wcsicmp(args[i], L"-GPU") == 0))
        {
            m_useGPU = true;
-            m_deviceKind = LearningModelDeviceKind::DirectX;
        }
        else if ((_wcsicmp(args[i], L"-GPUHighPerformance") == 0))
        {
-            m_useGPU = true;
-            m_deviceKind = LearningModelDeviceKind::DirectXHighPerformance;
+            m_useGPUHighPerformance = true;
        }
        else if ((_wcsicmp(args[i], L"-GPUMinPower") == 0))
        {
-            m_useGPU = true;
-            m_deviceKind = LearningModelDeviceKind::DirectXMinPower;
+            m_useGPUMinPower = true;
        }
        if ((_wcsicmp(args[i], L"-iterations") == 0) && (i + 1 < numArgs))
        {
@ -63,6 +68,34 @@ CommandLineArgs::CommandLineArgs()
        {
            m_inputData = args[++i];
        }
+        else if ((_wcsicmp(args[i], L"-output") == 0))
+        {
+            m_outputPath = args[++i];
+        }
+        else if ((_wcsicmp(args[i], L"-RGB") == 0))
+        {
+            m_useRGB = true;
+        }
+        else if ((_wcsicmp(args[i], L"-BGR") == 0))
+        {
+            m_useBGR = true;
+        }
+        else if ((_wcsicmp(args[i], L"-tensor") == 0))
+        {
+            m_useTensor = true;
+        }
+        else if ((_wcsicmp(args[i], L"-CPUBoundInput") == 0))
+        {
+            m_useCPUBoundInput = true;
+        }
+        else if ((_wcsicmp(args[i], L"-GPUBoundInput") == 0))
+        {
+            m_useGPUBoundInput = true;
+        }
+        else if ((_wcsicmp(args[i], L"-IgnoreFirstRun") == 0))
+        {
+            m_ignoreFirstRun = true;
+        }
        else if ((_wcsicmp(args[i], L"-perf") == 0))
        {
            m_perfCapture = true;
@ -71,6 +104,10 @@ CommandLineArgs::CommandLineArgs()
        {
            m_debug = true;
        }
+        else if ((_wcsicmp(args[i], L"-silent") == 0))
+        {
+            m_silent = true;
+        }
        else if ((_wcsicmp(args[i], L"/?") == 0))
        {
            PrintUsage();
@ -78,8 +115,6 @@ CommandLineArgs::CommandLineArgs()
        }
    }

-    m_useCPUandGPU = m_useCPU == m_useGPU;
-
    if (m_modelPath.empty() && m_modelFolderPath.empty())
    {
        std::cout << std::endl;
--- a/Tools/WinMLRunner/CommandLineArgs.h
+++ b/Tools/WinMLRunner/CommandLineArgs.h
@ -7,35 +7,74 @@ public:
    CommandLineArgs();
    void PrintUsage();

-    bool UseCPU() const { return m_useCPU; }
-    bool UseGPU() const { return m_useGPU; }
-    bool UseCPUandGPU() const { return m_useCPUandGPU; }
-    Windows::AI::MachineLearning::LearningModelDeviceKind DeviceKind() const { return m_deviceKind; }
+    bool UseGPUHighPerformance() const { return m_useGPUHighPerformance; }
+    bool UseGPUMinPower() const { return m_useGPUMinPower; }
+    bool UseBGR() const { return m_useBGR; }
+    bool UseGPUBoundInput() const { return m_useGPUBoundInput; }
+    bool IgnoreFirstRun() const { return m_ignoreFirstRun; }
    bool PerfCapture() const { return m_perfCapture; }
-    bool EnableDebugOutput() const { return m_debug;  }
+    bool EnableDebugOutput() const { return m_debug; }
+    bool Silent() const { return m_silent; }
   
    const std::wstring& ImagePath() const { return m_imagePath; }
    const std::wstring& CsvPath() const { return m_csvData; }
-
+    const std::wstring& OutputPath() const { return m_outputPath; }
    const std::wstring& FolderPath() const { return m_modelFolderPath; }
    const std::wstring& ModelPath() const { return m_modelPath; }
+
    void SetModelPath(std::wstring path) { m_modelPath = path; }

-    UINT NumIterations() const { return m_numIterations; }
+    bool UseRGB() const
+    {
+        // If an image is specified without flags, we load it as a BGR image by default
+        return m_useRGB || (!m_imagePath.empty() && !m_useBGR && !m_useTensor);
+    }
+
+    bool UseTensor() const
+    {
+        // Tensor input is the default input if no flag is specified
+        return m_useTensor || (!m_useBGR && !UseRGB());
+    }
+
+    bool UseGPU() const
+    {
+        return m_useGPU || (!m_useCPU && !m_useGPUHighPerformance && !m_useGPUMinPower);
+    }
+
+    bool UseCPU() const
+    {
+        // CPU is the default device if no flag is specified
+        return m_useCPU || (!m_useGPU && !m_useGPUHighPerformance && !m_useGPUMinPower);
+    }
+
+    bool UseCPUBoundInput() const
+    {
+        // CPU is the default input binding if no flag is specified
+        return m_useCPUBoundInput || !m_useGPUBoundInput;
+    }
+
+    uint32_t NumIterations() const { return m_numIterations; }

 private:
    bool m_perfCapture = false;
    bool m_useCPU = false;
    bool m_useGPU = false;
-    bool m_useCPUandGPU = false;
+    bool m_useGPUHighPerformance = false;
+    bool m_useGPUMinPower = false;
+    bool m_useRGB = false;
+    bool m_useBGR = false;
+    bool m_useTensor = false;
+    bool m_useCPUBoundInput = false;
+    bool m_useGPUBoundInput = false;
+    bool m_ignoreFirstRun = false;
    bool m_debug = false;
-    Windows::AI::MachineLearning::LearningModelDeviceKind m_deviceKind = Windows::AI::MachineLearning::LearningModelDeviceKind::DirectX;
+    bool m_silent = false;

    std::wstring m_modelFolderPath;
    std::wstring m_modelPath;
    std::wstring m_imagePath;
-    
    std::wstring m_csvData;
    std::wstring m_inputData;
-    UINT m_numIterations = 1;
+    std::wstring m_outputPath;
+    uint32_t m_numIterations = 1;
 };
--- a/Tools/WinMLRunner/Common.h
+++ b/Tools/WinMLRunner/Common.h
@ -6,9 +6,8 @@
 #include <winrt/Windows.Media.h>
 #include <winrt/Windows.Graphics.Imaging.h>
 #include <winrt/Windows.Media.h>
-#include "winrt/Windows.Storage.h"
+#include <winrt/Windows.Storage.h>
 #include <winrt/Windows.Storage.Streams.h>
-#include "TimerHelper.h"
 #include <vector>
 #include <string>
 #include <iostream>
@ -19,6 +18,8 @@
 #include <cassert>
 #include <fstream>
 #include <dxgi1_6.h>
+#include "TypeHelper.h"
+#include "TimerHelper.h"

 enum WINML_MODEL_TEST_PERF
 {
--- a/Tools/WinMLRunner/Filehelper.cpp
+++ b/Tools/WinMLRunner/Filehelper.cpp
@ -0,0 +1,25 @@
+#include "Filehelper.h"
+#include <libloaderapi.h>
+#include <stdlib.h>
+
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+
+namespace FileHelper
+{
+    std::wstring GetModulePath()
+    {
+        std::wstring val;
+        wchar_t modulePath[MAX_PATH] = { 0 };
+        GetModuleFileNameW((HINSTANCE)&__ImageBase, modulePath, _countof(modulePath));
+        wchar_t drive[_MAX_DRIVE];
+        wchar_t dir[_MAX_DIR];
+        wchar_t filename[_MAX_FNAME];
+        wchar_t ext[_MAX_EXT];
+        errno_t err = _wsplitpath_s(modulePath, drive, _MAX_DRIVE, dir, _MAX_DIR, filename, _MAX_FNAME, ext, _MAX_EXT);
+
+        val = drive;
+        val += dir;
+
+        return val;
+    }
+}
--- a/Tools/WinMLRunner/Filehelper.h
+++ b/Tools/WinMLRunner/Filehelper.h
@ -0,0 +1,7 @@
+#pragma once
+#include <string>
+#include <Windows.h>
+namespace FileHelper
+{
+    std::wstring GetModulePath();
+}
--- a/Tools/WinMLRunner/Main.cpp
+++ b/Tools/WinMLRunner/Main.cpp
@ -7,10 +7,192 @@

 Profiler<WINML_MODEL_TEST_PERF> g_Profiler;

+LearningModel LoadModel(const std::wstring path, bool capturePerf, bool silent, OutputHelper& output)
+{
+    Timer timer;
+    LearningModel model = nullptr;
+
+    output.PrintLoadingInfo(path);
+
+    try
+    {
+        if (capturePerf)
+        {
+            WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
+            timer.Start();
+        }
+        model = LearningModel::LoadFromFilePath(path);
+
+        if (capturePerf)
+        {
+            WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
+            output.m_clockLoadTime = timer.Stop();
+        }
+    }
+    catch (hresult_error hr)
+    {
+        std::wcout << "Load Model: " << path << " [FAILED]" << std::endl;
+        std::wcout << hr.message().c_str() << std::endl;
+        throw;
+    }
+
+    output.PrintModelInfo(path, model);
+
+    return model;
+}
+
+std::vector<std::wstring> GetModelsInDirectory(CommandLineArgs& args, OutputHelper* output)
+{
+    std::vector<std::wstring> modelPaths;
+
+    std::wstring folderPath = args.FolderPath();
+    for (auto & it : std::filesystem::directory_iterator(args.FolderPath()))
+    {
+        std::string path = it.path().string();
+
+        if (it.path().string().find(".onnx") != std::string::npos ||
+            it.path().string().find(".pb") != std::string::npos)
+        {
+            std::wstring fileName;
+            fileName.assign(path.begin(), path.end());
+            args.SetModelPath(fileName);
+            modelPaths.push_back(fileName);
+        }
+    }
+
+    return modelPaths;
+}
+
+std::vector<ILearningModelFeatureValue> GenerateInputFeatures(const LearningModel& model, const CommandLineArgs& args, InputBindingType inputBindingType, InputDataType inputDataType)
+{
+    std::vector<ILearningModelFeatureValue> inputFeatures;
+
+    for (uint32_t i = 0; i < model.InputFeatures().Size(); i++)
+    {
+        auto&& description = model.InputFeatures().GetAt(i);
+
+        if (inputDataType == InputDataType::Tensor || i > 0)
+        {
+            // For now, only the first input can be bound with real data
+            std::wstring csvPath = i == 0 ? args.CsvPath() : std::wstring();
+            auto tensorFeature = BindingUtilities::CreateBindableTensor(description, csvPath);
+            inputFeatures.push_back(tensorFeature);
+        }
+        else
+        {
+            auto imageFeature = BindingUtilities::CreateBindableImage(description, args.ImagePath(), inputBindingType, inputDataType);
+            inputFeatures.push_back(imageFeature);
+        }
+    }
+
+    return inputFeatures;
+}
+
+HRESULT BindInputFeatures(const LearningModel& model, const LearningModelBinding& context, const std::vector<ILearningModelFeatureValue>& inputFeatures, const CommandLineArgs& args, OutputHelper& output, bool capturePerf)
+{
+    assert(model.InputFeatures().Size() == inputFeatures.size());
+
+    try
+    {
+        context.Clear();
+
+        Timer timer;
+
+        if (capturePerf)
+        {
+            timer.Start();
+            WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
+        }
+
+        for (uint32_t i = 0; i < model.InputFeatures().Size(); i++)
+        {
+            auto&& description = model.InputFeatures().GetAt(i);
+            context.Bind(description.Name(), inputFeatures[i]);
+        }
+
+        if (capturePerf)
+        {
+            WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
+            output.m_clockBindTimes.push_back(timer.Stop());
+        }
+
+        if (!args.Silent())
+        {
+            std::cout << "[SUCCESS]" << std::endl;
+        }
+    }
+    catch (hresult_error hr)
+    {
+        std::cout << "[FAILED] Could Not Bind Input To Context" << std::endl;
+        std::wcout << hr.message().c_str() << std::endl;
+        return hr.code();
+    }
+
+    return S_OK;
+}
+
+HRESULT EvaluateModel(
+    const LearningModel& model,
+    const LearningModelBinding& context,
+    LearningModelSession& session,
+    bool isGarbageData,
+    const CommandLineArgs& args,
+    OutputHelper& output,
+    bool capturePerf
+)
+{
+    LearningModelEvaluationResult result = nullptr;
+
+    try
+    {
+        // Timer measures wall-clock time between the last two start/stop calls.
+        Timer timer;
+
+        if (capturePerf)
+        {
+            timer.Start();
+            WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
+        }
+
+        result = session.Evaluate(context, L"");
+
+        if (capturePerf)
+        {
+            WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
+            output.m_clockEvalTimes.push_back(timer.Stop());
+        }
+    }
+    catch (winrt::hresult_error hr)
+    {
+        std::cout << "[FAILED]" << std::endl;
+        std::wcout << hr.message().c_str() << std::endl;
+        return hr.code();
+    }
+
+    if (!args.Silent())
+    {
+        std::cout << "[SUCCESS]" << std::endl;
+    }
+
+    if (!isGarbageData && !args.Silent())
+    {
+        BindingUtilities::PrintEvaluationResults(model, args, result.Outputs());
+    }
+
+    return S_OK;
+}
+
 // Binds and evaluates the user-specified model and outputs success/failure for each step. If the
 // perf flag is used, it will output the CPU, GPU, and wall-clock time for each step to the
 // command-line and to a CSV file.
-HRESULT EvaluateModel(LearningModel model, const CommandLineArgs& args, OutputHelper* output, LearningModelDeviceKind deviceKind)
+HRESULT EvaluateModel(
+    const LearningModel& model,
+    const CommandLineArgs& args,
+    OutputHelper& output,
+    DeviceType deviceType,
+    InputBindingType inputBindingType,
+    InputDataType inputDataType
+)
 {
    if (model == nullptr)
    {
@ -18,12 +200,9 @@ HRESULT EvaluateModel(LearningModel model, const CommandLineArgs& args, OutputHe
    }
    LearningModelSession session = nullptr;

-    // Timer measures wall-clock time between the last two start/stop calls.
-    Timer timer;
-
    try
    {
-        session =  LearningModelSession(model, LearningModelDevice(deviceKind));
+        session = LearningModelSession(model, TypeHelper::GetWinmlDeviceKind(deviceType));
    }
    catch (hresult_error hr)
    {
@ -38,246 +217,218 @@ HRESULT EvaluateModel(LearningModel model, const CommandLineArgs& args, OutputHe
        session.EvaluationProperties().Insert(L"EnableDebugOutput", nullptr);
    }

-    LearningModelBinding binding(session);
+    LearningModelBinding context(session);

    bool useInputData = false;
-    std::string device = deviceKind == LearningModelDeviceKind::Cpu ? "CPU" : "GPU";
-    std::cout << "Binding Model on " << device << "...";
-    if (args.PerfCapture())
-    {
-        WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
-        timer.Start();
-    }
-    if (!args.ImagePath().empty())
-    {
-        useInputData = true;
-        try
-        {
-            BindingUtilities::BindImageToContext(binding, model, args.ImagePath());
-        }
-        catch (hresult_error hr)
-        {
-            std::cout << "[FAILED] Could Not Bind Image To Context" << std::endl;
-            std::wcout << hr.message().c_str() << std::endl;
-            return hr.code();
-        }
-    }
-    else if (!args.CsvPath().empty())
-    {
-        useInputData = true;
-        try
-        {
-            BindingUtilities::BindCSVDataToContext(binding, model, args.CsvPath());
-        }
-        catch (hresult_error hr)
-        {
-            std::cout << "[FAILED] Could Not Bind CSV Data To Context" << std::endl;
-            std::wcout << hr.message().c_str() << std::endl;
-            return hr.code();
-        }
-    }
-    else
-    {
-        try
-        {
-            BindingUtilities::BindGarbageDataToContext(binding, model);
-        }
-        catch (hresult_error hr)
-        {
-            std::cout << "[FAILED] Could Not Garbage Data Context" << std::endl;
-            std::wcout << hr.message().c_str() << std::endl;
-            return hr.code();
-        }
-    }
-    if (args.PerfCapture())
-    {
-        WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::BIND_VALUE);
-        output->m_clockBindTime = timer.Stop();
-    }
-    std::cout << "[SUCCESS]" << std::endl;
+    
+    // Add one more iteration if we ignore the first run
+    uint32_t numIterations = args.NumIterations() + args.IgnoreFirstRun();

-    std::cout << "Evaluating Model on " << device << "...";
-    LearningModelEvaluationResult result = nullptr;
-    if(args.PerfCapture())
+    bool isGarbageData = !args.CsvPath().empty() || !args.ImagePath().empty();
+
+    // Run the binding + evaluate multiple times and average the results
+    for (uint32_t i = 0; i < numIterations; i++)
    {
-        for (UINT i = 0; i < args.NumIterations(); i++)
+        bool captureIterationPerf = args.PerfCapture() && (!args.IgnoreFirstRun() || i > 0);
+
+        output.PrintBindingInfo(i + 1, deviceType, inputBindingType, inputDataType);
+
+        std::vector<ILearningModelFeatureValue> inputFeatures = GenerateInputFeatures(model, args, inputBindingType, inputDataType);
+        HRESULT bindInputResult = BindInputFeatures(model, context, inputFeatures, args, output, captureIterationPerf);
+
+        if (FAILED(bindInputResult))
        {
-            WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
-            timer.Start();
-            try
-            {
-                result = session.Evaluate(binding, L"");
-            }
-            catch (hresult_error hr)
-            {
-                std::cout << "[FAILED]" << std::endl;
-                std::wcout << hr.message().c_str() << std::endl;
-                return hr.code();
-            }
-            WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::EVAL_MODEL);
-            output->m_clockEvalTimes.push_back(timer.Stop());
-            std::cout << "[SUCCESS]" << std::endl;
+            return bindInputResult;
        }

-        output->PrintWallClockTimes(args.NumIterations());
-        if (deviceKind == LearningModelDeviceKind::Cpu)
+        output.PrintEvaluatingInfo(i + 1, deviceType, inputBindingType, inputDataType);
+
+        HRESULT evalResult = EvaluateModel(model, context, session, isGarbageData, args, output, captureIterationPerf);
+
+        if (FAILED(evalResult))
        {
-            output->PrintCPUTimes(g_Profiler, args.NumIterations());
+            return evalResult;
        }
-        else {
-            output->PrintGPUTimes(g_Profiler, args.NumIterations());
-        }
-        g_Profiler.Reset();
-    }
-    else
-    {
-        try
-        {
-            result = session.Evaluate(binding, L"");
-        }
-        catch (hresult_error hr)
-        {
-            std::cout << "[FAILED]" << std::endl;
-            std::wcout << hr.message().c_str() << std::endl;
-            return hr.code();
-        }
-        std::cout << "[SUCCESS]" << std::endl;
    }

-    std::cout << std::endl;
+    session.Close();

-    if (useInputData)
-    {
-       BindingUtilities::PrintEvaluationResults(model, args, result.Outputs());
-    }
    return S_OK;
 }

-LearningModel LoadModelHelper(const CommandLineArgs& args, OutputHelper * output)
+HRESULT EvaluateModels(
+    std::vector<std::wstring>& modelPaths,
+    const std::vector<DeviceType>& deviceTypes,
+    const std::vector<InputBindingType>& inputBindingTypes,
+    const std::vector<InputDataType>& inputDataTypes,
+    const CommandLineArgs& args,
+    OutputHelper& output
+)
 {
-    Timer timer;
-    LearningModel model = nullptr;
+    output.PrintHardwareInfo();

-    try
+    for (std::wstring& path : modelPaths)
    {
-        if (args.PerfCapture())
-        {
-            WINML_PROFILING_START(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
-            timer.Start();
-        }
-        model = LearningModel::LoadFromFilePath(args.ModelPath());
-    }
-    catch (hresult_error hr)
-    {
-        std::wcout << "Load Model: " << args.ModelPath() << " [FAILED]" << std::endl;
-        std::wcout << hr.message().c_str() << std::endl;
-        throw;
-    }
-    if (args.PerfCapture())
-    {
-        WINML_PROFILING_STOP(g_Profiler, WINML_MODEL_TEST_PERF::LOAD_MODEL);
-        output->m_clockLoadTime = timer.Stop();
-    }
-    output->PrintModelInfo(args.ModelPath(), model);
-    std::cout << "Loading model...[SUCCESS]" << std::endl;
-
-    return model;
-}
-
-HRESULT EvaluateModelsInDirectory(CommandLineArgs& args, OutputHelper * output)
-{
-    std::wstring folderPath = args.FolderPath();
-    for (auto & it : std::filesystem::directory_iterator(args.FolderPath()))
-    {
-        std::string path = it.path().string();
-        if (it.path().string().find(".onnx") != std::string::npos ||
-            it.path().string().find(".pb") != std::string::npos)
-        {
-            std::wstring fileName;
-            fileName.assign(path.begin(), path.end());
-            args.SetModelPath(fileName);
-            LearningModel model = nullptr;
-            try
-            {
-                model = LoadModelHelper(args, output);
-            }
-            catch (hresult_error hr)
-            {
-                std::cout << hr.message().c_str() << std::endl;
-                return hr.code();
-            }
-            if (args.UseCPUandGPU() || args.UseCPU())
-            {
-                HRESULT evalHResult = EvaluateModel(model, args, output, LearningModelDeviceKind::Cpu);
-                if (evalHResult != S_OK)
-                {
-                    return evalHResult;
-                }
-            }
-            if (args.UseCPUandGPU() || args.UseGPU())
-            {
-                HRESULT evalHResult = EvaluateModel(model, args, output, args.DeviceKind());
-                if (evalHResult != S_OK)
-                {
-                    return evalHResult;
-                }
-            }
-            output->WritePerformanceDataToCSV(g_Profiler, args, fileName);
-            output->Reset();
-        }
-    }
-    return S_OK;
-}
-
-int main(int argc, char** argv)
-{
-    CommandLineArgs args;
-    OutputHelper output;
-
-    // Initialize COM in a multi-threaded environment.
-    winrt::init_apartment();
-
-    // Profiler is a wrapper class that captures and stores timing and memory usage data on the
-    // CPU and GPU.
-    g_Profiler.Enable();
-    output.SetDefaultCSVFileName();
-
-    if (!args.ModelPath().empty())
-    {
-        output.PrintHardwareInfo();
        LearningModel model = nullptr;
+
        try
        {
-            model = LoadModelHelper(args, &output);
+            model = LoadModel(path, args.PerfCapture(), args.Silent(), output);
        }
        catch (hresult_error hr)
        {
            std::cout << hr.message().c_str() << std::endl;
            return hr.code();
        }
-        if (args.UseCPUandGPU() || args.UseCPU())
+
+        auto firstFeature = model.InputFeatures().First().Current();
+        auto tensorDescriptor = firstFeature.try_as<TensorFeatureDescriptor>();
+
+        // Map and Sequence bindings are not supported yet
+        if (!tensorDescriptor)
        {
-            HRESULT evalHResult = EvaluateModel(model, args, &output, LearningModelDeviceKind::Cpu);
-            if (FAILED(evalHResult))
+            continue;
+        }
+
+        for (auto deviceType : deviceTypes)
+        {
+            for (auto inputBindingType : inputBindingTypes)
            {
-                return evalHResult;
+                for (auto inputDataType : inputDataTypes)
+                {
+                    if (args.PerfCapture())
+                    {
+                        output.Reset();
+                        g_Profiler.Reset();
+                    }
+
+                    if (inputDataType != InputDataType::Tensor)
+                    {
+                        // Currently GPU binding only work with 4D tensors and RGBA/BGRA images
+                        if (tensorDescriptor.Shape().Size() != 4 || tensorDescriptor.Shape().GetAt(1) != 3)
+                        {
+                            continue;
+                        }
+                    }
+
+                    HRESULT evalHResult = EvaluateModel(model, args, output, deviceType, inputBindingType, inputDataType);
+
+                    if (FAILED(evalHResult))
+                    {
+                        return evalHResult;
+                    }
+
+                    if (args.PerfCapture())
+                    {
+                        output.PrintResults(g_Profiler, args.NumIterations(), deviceType, inputBindingType, inputDataType);
+                        output.WritePerformanceDataToCSV(g_Profiler, args.NumIterations(), path, TypeHelper::Stringify(deviceType), TypeHelper::Stringify(inputDataType), TypeHelper::Stringify(inputBindingType), args.IgnoreFirstRun());
+                    }
+                }
            }
        }
-        if (args.UseCPUandGPU() || args.UseGPU())
-        {
-            HRESULT evalHResult = EvaluateModel(model, args, &output, args.DeviceKind());
-            if (FAILED(evalHResult))
-            {
-                return evalHResult;
-            }
-        }
-        output.WritePerformanceDataToCSV(g_Profiler, args, args.ModelPath());
-        output.Reset();
+
+        model.Close();
    }
-    else if (!args.FolderPath().empty())
+
+    return S_OK;
+}
+
+std::vector<InputDataType> FetchInputDataTypes(const CommandLineArgs& args)
+{
+    std::vector<InputDataType> inputDataTypes;
+
+    if (args.UseTensor())
    {
-        output.PrintHardwareInfo();
-        return EvaluateModelsInDirectory(args, &output);
+        inputDataTypes.push_back(InputDataType::Tensor);
    }
+
+    if (args.UseRGB())
+    {
+        inputDataTypes.push_back(InputDataType::ImageRGB);
+    }
+
+    if (args.UseBGR())
+    {
+        inputDataTypes.push_back(InputDataType::ImageBGR);
+    }
+
+    return inputDataTypes;
+}
+
+std::vector<DeviceType> FetchDeviceTypes(const CommandLineArgs& args)
+{
+    std::vector<DeviceType> deviceTypes;
+
+    if (args.UseCPU())
+    {
+        deviceTypes.push_back(DeviceType::CPU);
+    }
+
+    if (args.UseGPU())
+    {
+        deviceTypes.push_back(DeviceType::DefaultGPU);
+    }
+
+    if (args.UseGPUHighPerformance())
+    {
+        deviceTypes.push_back(DeviceType::HighPerfGPU);
+    }
+
+    if (args.UseGPUMinPower())
+    {
+        deviceTypes.push_back(DeviceType::MinPowerGPU);
+    }
+
+    return deviceTypes;
+}
+
+std::vector<InputBindingType> FetchInputBindingTypes(const CommandLineArgs& args)
+{
+    std::vector<InputBindingType> inputBindingTypes;
+
+    if (args.UseCPUBoundInput())
+    {
+        inputBindingTypes.push_back(InputBindingType::CPU);
+    }
+
+    if (args.UseGPUBoundInput())
+    {
+        inputBindingTypes.push_back(InputBindingType::GPU);
+    }
+
+    return inputBindingTypes;
+}
+
+int main(int argc, char** argv)
+{
+    // Initialize COM in a multi-threaded environment.
+    winrt::init_apartment();
+
+    CommandLineArgs args;
+    OutputHelper output(args.Silent());
+
+    // Profiler is a wrapper class that captures and stores timing and memory usage data on the
+    // CPU and GPU.
+    g_Profiler.Enable();
+
+    if (!args.OutputPath().empty())
+    {
+        output.SetCSVFileName(args.OutputPath());
+    }
+    else
+    {
+        output.SetDefaultCSVFileName();
+    }
+    
+    std::vector<DeviceType> deviceTypes = FetchDeviceTypes(args);
+    std::vector<InputBindingType> inputBindingTypes = FetchInputBindingTypes(args);
+    std::vector<InputDataType> inputDataTypes = FetchInputDataTypes(args);
+    std::vector<std::wstring> modelPaths = args.ModelPath().empty() ? GetModelsInDirectory(args, &output) : std::vector<std::wstring>(1, args.ModelPath());
+
+    if (!args.ModelPath().empty() || !args.FolderPath().empty())
+    {
+        return EvaluateModels(modelPaths, deviceTypes, inputBindingTypes, inputDataTypes, args, output);
+    }
+
    return 0;
 }
--- a/Tools/WinMLRunner/OutputHelper.h
+++ b/Tools/WinMLRunner/OutputHelper.h
@ -1,6 +1,5 @@
 #pragma once
 #include "Common.h"
-#include "CommandLineArgs.h"
 #include <fstream>
 #include <ctime>
 #include <locale>
@ -9,65 +8,162 @@
 #include <iomanip>

 using namespace winrt::Windows::AI::MachineLearning;
-using namespace Windows::Foundation::Collections;
-using namespace Windows::Storage;
 using namespace Windows::Storage::Streams;
-using namespace Windows::Media;
-using namespace Windows::Graphics::Imaging;

 // Stores performance information and handles output to the command line and CSV files.
 class OutputHelper
 {
 public:
-    OutputHelper() {}
+    OutputHelper(bool silent) : m_silent(silent) {}

-    void PrintWallClockTimes(UINT iterations)
+    void PrintLoadingInfo(const std::wstring& modelPath) const
    {
+        if (!m_silent)
+        {
+            wprintf(L"Loading model (path = %s)...\n", modelPath.c_str());
+        }
+    }
+
+    void PrintBindingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType, InputDataType inputDataType) const
+    {
+        if (!m_silent)
+        {
+            printf(
+                "Binding (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s)...",
+                TypeHelper::Stringify(deviceType).c_str(),
+                iteration,
+                TypeHelper::Stringify(inputBindingType).c_str(),
+                TypeHelper::Stringify(inputDataType).c_str()
+            );
+        }
+    }
+
+    void PrintEvaluatingInfo(uint32_t iteration, DeviceType deviceType, InputBindingType inputBindingType, InputDataType inputDataType) const
+    {
+        if (!m_silent)
+        {
+            printf(
+                "Evaluating (device = %s, iteration = %d, inputBinding = %s, inputDataType = %s)...",
+                TypeHelper::Stringify(deviceType).c_str(),
+                iteration,
+                TypeHelper::Stringify(inputBindingType).c_str(),
+                TypeHelper::Stringify(inputDataType).c_str()
+            );
+        }
+    }
+
+    void PrintModelInfo(std::wstring modelPath, LearningModel model) const
+    {
+        if (!m_silent)
+        {
+            std::cout << "=================================================================" << std::endl;
+            std::wcout << "Name: " << model.Name().c_str() << std::endl;
+            std::wcout << "Author: " << model.Author().c_str() << std::endl;
+            std::wcout << "Version: " << model.Version() << std::endl;
+            std::wcout << "Domain: " << model.Domain().c_str() << std::endl;
+            std::wcout << "Description: " << model.Description().c_str() << std::endl;
+            std::wcout << "Path: " << modelPath << std::endl;
+            std::cout << "Support FP16: " << std::boolalpha << doesModelContainFP16(model) << std::endl;
+
+            std::cout << std::endl;
+            //print out information about input of model
+            std::cout << "Input Feature Info:" << std::endl;
+            for (auto&& inputFeature : model.InputFeatures())
+            {
+                PrintFeatureDescriptorInfo(inputFeature);
+            }
+            //print out information about output of model
+            std::cout << "Output Feature Info:" << std::endl;
+            for (auto&& outputFeature : model.OutputFeatures())
+            {
+                PrintFeatureDescriptorInfo(outputFeature);
+            }
+            std::cout << "=================================================================" << std::endl;
+            std::cout << std::endl;
+        }
+    }
+
+    void PrintFeatureDescriptorInfo(const ILearningModelFeatureDescriptor &descriptor) const
+    {
+        if (!m_silent)
+        {
+            //IMPORTANT: This learningModelFeatureKind array needs to match the "enum class 
+            //LearningModelFeatureKind" idl in Windows.AI.MachineLearning.0.h
+            const std::string learningModelFeatureKind[] =
+            {
+                "Tensor",
+                "Sequence",
+                "Map",
+                "Image",
+            };
+            std::wstring name(descriptor.Name());
+            std::wcout << "Name: " << name << std::endl;
+            std::wcout << "Feature Kind: " << FeatureDescriptorToString(descriptor) << std::endl;
+            std::cout << std::endl;
+        }
+    }
+
+    void PrintHardwareInfo() const
+    {
+        if (!m_silent)
+        {
+            std::cout << "WinML Runner" << std::endl;
+
+            com_ptr<IDXGIFactory6> factory;
+            CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void());
+            com_ptr<IDXGIAdapter> adapter;
+            factory->EnumAdapters(0, adapter.put());
+            DXGI_ADAPTER_DESC description;
+            if (SUCCEEDED(adapter->GetDesc(&description)))
+            {
+                std::wcout << L"GPU: " << description.Description << std::endl;
+                std::cout << std::endl;
+            }
+        }
+    }
+
+    void PrintResults(const Profiler<WINML_MODEL_TEST_PERF> &profiler, uint32_t numIterations, DeviceType deviceType, InputBindingType inputBindingType, InputDataType inputDataType) const
+    {
+        double loadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
+        double bindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
+        double evalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
+        double evalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
+        double gpuEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+        double gpuEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+        double totalBindTime = std::accumulate(m_clockBindTimes.begin(), m_clockBindTimes.end(), 0.0);
+        double clockBindTime = totalBindTime / (double)numIterations;
+
        double totalEvalTime = std::accumulate(m_clockEvalTimes.begin(), m_clockEvalTimes.end(), 0.0);
-        m_clockEvalTime = totalEvalTime / (double)iterations;
+        double clockEvalTime = totalEvalTime / (double)numIterations;

-        std::cout << std::endl;
-        std::cout << "Wall-clock Time Averages (iterations = " << iterations << "):" << std::endl;
-        std::cout << "  Load: " << m_clockLoadTime << " ms" << std::endl;
-        std::cout << "  Bind: " << m_clockBindTime << " ms" << std::endl;
-        std::cout << "  Evaluate: " << m_clockEvalTime << " ms" << std::endl;
-        std::cout << "  Total time: " << m_clockLoadTime + m_clockBindTime + m_clockEvalTime << " ms" << std::endl;
-        std::cout << std::endl;
-    }
+        if (!m_silent)
+        {
+            double totalTime = (isnan(loadTime) ? 0 : loadTime) + bindTime + evalTime;

-    void PrintCPUTimes(Profiler<WINML_MODEL_TEST_PERF> &profiler, UINT iterations)
-    {
-         m_CPULoadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
-         m_CPUBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
-         m_CPUEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
-         m_CPUEvalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
+            std::cout << std::endl;

-        std::cout << std::endl;
-        std::cout << "CPU Time Averages (iterations = " << iterations << "):" << std::endl;
-        std::cout << "  Load: " << m_CPULoadTime << " ms" << std::endl;
-        std::cout << "  Bind: " << m_CPUBindTime << " ms" << std::endl;
-        std::cout << "  Evaluate: " << m_CPUEvalTime << " ms" << std::endl;
-        std::cout << "  Total time: " << m_CPULoadTime + m_CPUBindTime + m_CPUEvalTime << " ms" << std::endl;
-        std::cout << "  Working Set Memory usage (evaluate): " << m_CPUEvalMemoryUsage << " MB" << std::endl;
-        std::cout << std::endl;
-    }
+            printf("Results (device = %s, numIterations = %d, inputBinding = %s, inputDataType = %s):\n",
+                TypeHelper::Stringify(deviceType).c_str(),
+                numIterations,
+                TypeHelper::Stringify(inputBindingType).c_str(),
+                TypeHelper::Stringify(inputDataType).c_str()
+            );

-    void PrintGPUTimes(Profiler<WINML_MODEL_TEST_PERF> &profiler, UINT iterations)
-    {
-         m_GPUBindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
-         m_GPUEvalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
-         m_GPUEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
-         m_GPUEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+            std::cout << "  Load: " << (isnan(loadTime) ? "N/A" : std::to_string(loadTime) + "ms") << std::endl;
+            std::cout << "  Bind: " << bindTime << std::endl;
+            std::cout << "  Evaluate: " << evalTime << std::endl;
+            std::cout << "  Total Time: " << totalTime << std::endl;
+            std::cout << "  Wall-Clock Load: " << m_clockLoadTime << std::endl;
+            std::cout << "  Wall-Clock Bind: " << clockBindTime << std::endl;
+            std::cout << "  Wall-Clock Evaluate: " << clockEvalTime << std::endl;
+            std::cout << "  Total Wall-Clock Time: " << (m_clockLoadTime + clockBindTime + clockEvalTime) << std::endl;
+            std::cout << "  Working Set Memory usage (evaluate): " << gpuEvalDedicatedMemoryUsage << " MB" << std::endl;
+            std::cout << "  Dedicated Memory Usage (evaluate): " << gpuEvalDedicatedMemoryUsage << " MB" << std::endl;
+            std::cout << "  Shared Memory Usage (evaluate): " << gpuEvalSharedMemoryUsage << " MB" << std::endl;

-        std::cout << std::endl;
-        std::cout << "GPU Time Averages (iterations = " << iterations << "):" << std::endl;
-        std::cout << "  Load: " << "N/A" << std::endl;
-        std::cout << "  Bind: " << m_GPUBindTime << " ms" << std::endl;
-        std::cout << "  Evaluate: " << m_GPUEvalTime << " ms" << std::endl;
-        std::cout << "  Total time: " << m_GPUBindTime + m_GPUEvalTime << " ms" << std::endl;
-        std::cout << "  Dedicated memory usage (evaluate): " << m_GPUEvalDedicatedMemoryUsage << " MB" << std::endl;
-        std::cout << "  Shared memory usage (evaluate): " << m_GPUEvalSharedMemoryUsage << " MB" << std::endl;
-        std::cout << std::endl;
+            std::cout << std::endl << std::endl;
+        }
    }

    static std::wstring FeatureDescriptorToString(const ILearningModelFeatureDescriptor &descriptor)
@ -125,23 +221,6 @@ public:
        }
    }

-    static void PrintFeatureDescriptorInfo(const winrt::Windows::AI::MachineLearning::ILearningModelFeatureDescriptor &descriptor)
-    {
-        //IMPORTANT: This learningModelFeatureKind array needs to match the "enum class 
-        //LearningModelFeatureKind" idl in Windows.AI.MachineLearning.0.h
-        const std::string learningModelFeatureKind[] =
-        {
-            "Tensor",
-            "Sequence",
-            "Map",
-            "Image",
-        };
-        std::wstring name(descriptor.Name());
-        std::wcout << "Name: " << name << std::endl;
-        std::wcout <<"Feature Kind: " << FeatureDescriptorToString(descriptor)<< std::endl;
-        std::cout << std::endl;
-    }
-
    static bool doesDescriptorContainFP16(const ILearningModelFeatureDescriptor &descriptor)
    {
        switch (descriptor.Kind())
@ -184,50 +263,6 @@ public:
        return false;
    }

-    void PrintModelInfo(std::wstring modelPath, LearningModel model)
-    {
-        std::cout << "=================================================================" << std::endl;
-        std::wcout << "Name: " << model.Name().c_str() << std::endl;
-        std::wcout << "Author: " << model.Author().c_str() << std::endl;
-        std::wcout << "Version: " << model.Version() << std::endl;
-        std::wcout << "Domain: " << model.Domain().c_str() << std::endl;
-        std::wcout << "Description: " << model.Description().c_str() << std::endl;
-        std::wcout << "Path: " << modelPath << std::endl;
-        std::cout << "Support FP16: " << std::boolalpha << doesModelContainFP16(model) << std::endl;
-
-        std::cout << std::endl;
-        //print out information about input of model
-        std::cout << "Input Feature Info:" << std::endl;
-        for (auto&& inputFeature : model.InputFeatures())
-        {
-            PrintFeatureDescriptorInfo(inputFeature);
-        }
-        //print out information about output of model
-        std::cout << "Output Feature Info:" << std::endl;
-        for (auto&& outputFeature : model.OutputFeatures())
-        {
-            PrintFeatureDescriptorInfo(outputFeature);
-        }
-        std::cout << "=================================================================" << std::endl;
-        std::cout << std::endl;
-    }
-
-    void PrintHardwareInfo()
-    {
-        std::cout << "WinML Runner" << std::endl;
-
-        com_ptr<IDXGIFactory6> factory;
-        (CreateDXGIFactory1(__uuidof(IDXGIFactory6), factory.put_void()));
-        com_ptr<IDXGIAdapter> adapter;
-        factory->EnumAdapters(0, adapter.put());
-        DXGI_ADAPTER_DESC description;
-        if (SUCCEEDED(adapter->GetDesc(&description)))
-        {
-            std::wcout << L"GPU: " << description.Description << std::endl;
-            std::cout << std::endl;
-        }
-    }
-
    void SetDefaultCSVFileName() 
    {
        auto time = std::time(nullptr);
@ -235,15 +270,35 @@ public:
        localtime_s(&localTime, &time);

        std::ostringstream oss;
-        oss << std::put_time(&localTime, "%Y-%m-%d");
+        oss << std::put_time(&localTime, "%Y-%m-%d %H.%M.%S");
        std::string fileName = "WinML Runner [" + oss.str() + "].csv";
        std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
        m_csvFileName = converter.from_bytes(fileName);
    }

-    void WritePerformanceDataToCSV(Profiler<WINML_MODEL_TEST_PERF> &g_Profiler, const CommandLineArgs& args, std::wstring model)
+    void SetCSVFileName(const std::wstring& fileName)
    {
-        if (m_csvFileName.length() > 0)
+        m_csvFileName = fileName;
+    }
+
+    void WritePerformanceDataToCSV(const Profiler<WINML_MODEL_TEST_PERF> &profiler, int numIterations, std::wstring model, std::string modelBinding, std::string inputBinding, std::string inputType, bool firstRunIgnored) const
+    {
+        double loadTime = profiler[LOAD_MODEL].GetAverage(CounterType::TIMER);
+        double bindTime = profiler[BIND_VALUE].GetAverage(CounterType::TIMER);
+        double evalTime = profiler[EVAL_MODEL].GetAverage(CounterType::TIMER);
+        double evalMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::WORKING_SET_USAGE);
+        double gpuEvalSharedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_SHARED_MEM_USAGE);
+        double gpuEvalDedicatedMemoryUsage = profiler[EVAL_MODEL].GetAverage(CounterType::GPU_DEDICATED_MEM_USAGE);
+
+        double totalBindTime = std::accumulate(m_clockBindTimes.begin(), m_clockBindTimes.end(), 0.0);
+        double clockBindTime = totalBindTime / (double)numIterations;
+
+        double totalEvalTime = std::accumulate(m_clockEvalTimes.begin(), m_clockEvalTimes.end(), 0.0);
+        double clockEvalTime = totalEvalTime / (double)numIterations;
+
+        double totalTime = (isnan(loadTime) ? 0 : loadTime) + bindTime + evalTime;
+
+        if (!m_csvFileName.empty())
        {
            // Check if header exists
            bool bNewFile = false;
@ -265,98 +320,66 @@ public:
            if (bNewFile)
            {
                fout << "Model Name" << ","
-                     << "Iterations" << ",";
-
-                if (args.UseCPUandGPU() || args.UseCPU()) 
-                {
-                    fout << "CPU Load (ms)" << ","
-                        << "CPU Bind (ms)" << ","
-                        << "CPU Evaluate (ms)" << ","
-                        << "CPU total time (ms)" << ","
-                        << "Working Set Memory usage (Evaluate) (MB)" << ",";
-                }
-                if (args.UseCPUandGPU() || args.UseGPU())
-                {
-
-                    fout << "GPU Load (ms)" << ","
-                        << "GPU Bind (ms)" << ","
-                        << "GPU Evaluate (ms)" << ","
-                        << "GPU total time (ms)" << ","
-                        << "Dedicated memory usage (evaluate) (MB)" << ","
-                        << "Shared memory usage (evaluate) (MB)" << ",";
-                }
-
-                    fout << "Wall-clock Load (ms)" << ","
-                         << "Wall-clock Bind (ms)" << ","
-                         << "Wall-clock Evaluate (ms)" << ","
-                         << "Wall-clock total time (ms)" << ","
-                         << std::endl;
+                     << "Model Binding" << ","
+                     << "Input Binding" << ","
+                     << "Input Type" << ","
+                     << "Iterations" << ","
+                     << "First Run Ignored" << ","
+                     << "Load (ms)" << ","
+                     << "Bind (ms)" << ","
+                     << "Evaluate (ms)" << ","
+                     << "Total Time (ms)" << ","
+                     << "Working Set Memory usage (evaluate) (MB)" << ","
+                     << "GPU Dedicated memory usage (evaluate) (MB)" << ","
+                     << "GPU Shared memory usage (evaluate) (MB)" << ","
+                     << "Wall-clock Load (ms)" << ","
+                     << "Wall-clock Bind (ms)" << ","
+                     << "Wall-clock Evaluate (ms)" << ","
+                     << "Wall-clock total time (ms)" << std::endl;
            }

-            fout << modelName << "," << args.NumIterations() << ",";
+            fout << modelName << ","
+                 << modelBinding << ","
+                 << inputBinding << ","
+                 << inputType << ","
+                 << numIterations << ","
+                 << firstRunIgnored << ","
+                 << (isnan(loadTime) ? "N/A" : std::to_string(loadTime)) << ","
+                 << bindTime << ","
+                 << evalTime << ","
+                 << totalTime << ","
+                 << evalMemoryUsage << ","
+                 << gpuEvalDedicatedMemoryUsage << ","
+                 << gpuEvalSharedMemoryUsage << ","
+                 << m_clockLoadTime << ","
+                 << clockBindTime << ","
+                 << clockEvalTime << ","
+                 << m_clockLoadTime + clockBindTime + clockEvalTime << std::endl;

-            if (args.UseCPUandGPU() || args.UseCPU())
-            {
-                fout << m_CPULoadTime << ","
-                << m_CPUBindTime << ","
-                << m_CPUEvalTime << ","
-                << m_CPULoadTime + m_CPUBindTime + m_CPUEvalTime << ","
-                << m_CPUEvalMemoryUsage << ",";
-            }
-            if (args.UseCPUandGPU() || args.UseGPU())
-            {
-                fout << "N/A" << ","
-                << m_GPUBindTime << ","
-                << m_GPUEvalTime << ","
-                << m_GPUBindTime + m_GPUEvalTime << ","
-                << m_GPUEvalDedicatedMemoryUsage << ","
-                << m_GPUEvalSharedMemoryUsage;
-            }
-
-            fout << m_clockLoadTime << ","
-            << m_clockBindTime << ","
-            << m_clockEvalTime << ","
-            << m_clockLoadTime + m_clockBindTime + m_clockEvalTime << ","
-            << std::endl;
            fout.close();
        }
    }
    
    void Reset() 
    {
-         m_GPUBindTime = 0;
-         m_GPUEvalTime = 0;
-         m_GPUEvalSharedMemoryUsage = 0;
-         m_GPUEvalDedicatedMemoryUsage = 0;
-
-         m_CPULoadTime = 0;
-         m_CPUBindTime = 0;
-         m_CPUEvalTime = 0;
-         m_CPUEvalMemoryUsage = 0;
-
-
+         m_clockEvalTime = 0;
         m_clockLoadTime = 0;
         m_clockBindTime = 0;
-         m_clockEvalTime = 0;
+
+         m_clockBindTimes.clear();
+         m_clockEvalTimes.clear();
    }

    double m_clockLoadTime = 0;
-    double m_clockBindTime = 0;
+
+    std::vector<double> m_clockBindTimes;
    std::vector<double> m_clockEvalTimes;

+private:
    std::wstring m_csvFileName;

-private:
-    double m_GPUBindTime = 0;
-    double m_GPUEvalTime = 0;
-    double m_GPUEvalSharedMemoryUsage = 0;
-    double m_GPUEvalDedicatedMemoryUsage = 0;
-
-    double m_CPULoadTime = 0;
-    double m_CPUBindTime = 0;
-    double m_CPUEvalTime = 0;
-    double m_CPUEvalMemoryUsage = 0;
-
+    double m_clockBindTime = 0;
    double m_clockEvalTime = 0;

+    bool m_silent = false;
 };
--- a/Tools/WinMLRunner/README.md
+++ b/Tools/WinMLRunner/README.md
@ -17,26 +17,41 @@ You must unzip the entire archive if you intend to build the samples.
 ## Run the tool
 ```
 Required command-Line arguments:
-model <path>         : Fully qualified path to a .onnx or .pb model file.
+-model <path>            : Fully qualified path to a .onnx or .pb model file.
      or
-folder <path>        : Fully qualifed path to a folder with .onnx and/or .pb models, will run all of the models in the folder.
+-folder <path>           : Fully qualifed path to a folder with .onnx and/or .pb models, will run all of the models in the folder.

 #Optional command-line arguments:
 -perf                    : Captures GPU, CPU, and wall-clock time measurements. 
-iterations <int>	     : Number of times to evaluate the model when capturing performance measurements.
-CPU             	     : Will create a session on the CPU.
-GPU            	     : Will create a session on the GPU.
-GPUMaxPerformance     : Will create a session with the most powerful GPU device available.
-GPUMinPower           : Will create a session with GPU with the least power.
+-iterations <int>        : Number of times to evaluate the model when capturing performance measurements.
+-CPU                     : Will create a session on the CPU.
+-GPU                     : Will create a session on the GPU.
+-GPUHighPerformance      : Will create a session with the most powerful GPU device available.
+-GPUMinPower             : Will create a session with GPU with the least power.
+-CPUBoundInput           : Will bind the input to the CPU.
+-GPUBoundInput           : Will bind the input to the GPU.
+-BGR                     : Will load the input as a BGR image.
+-RGB                     : Will load the input as an RGB image.
+-tensor                  : Will load the input as a tensor.
 -input <image/CSV path>  : Will bind image/data from CSV to model.
-debug                   : Will start a trace logging session. 
+-output <CSV path>       : Path to the CSV where the perf results will be written.
+-IgnoreFirstRun          : Will ignore the first run in the perf results when calculating the average
+-silent                  : Silent mode (only errors will be printed to the console)
+-debug                   : Will start a trace logging session.
+
 ```
 ### Examples:
 Run a model on the CPU and GPU separately 5 times and output performance data:
 > WinMLRunner.exe -model c:\\data\\concat.onnx -iterations 5 -perf

 Runs all the models in the data folder, captures performance data 3 times using only the CPU: 
-> WinMLRunner .exe -folder c:\\data -perf -iterations 3 -CPU
+> WinMLRunner.exe -folder c:\\data -perf -iterations 3 -CPU
+
+Run a model on the CPU and GPU separately, and by binding the input to the CPU and the GPU separately (4 total runs):
+> WinMLRunner.exe -model c:\\data\\SqueezeNet.onnx -CPU -GPU -CPUBoundInput -GPUBoundInput
+
+Run a model on the CPU with the input bound to the GPU and loaded as an RGB image:
+> WinMLRunner.exe -model c:\\data\\SqueezeNet.onnx -CPU -GPUBoundInput -RGB

 ## Default output

--- a/Tools/WinMLRunner/TimerHelper.h
+++ b/Tools/WinMLRunner/TimerHelper.h
@ -571,7 +571,7 @@ public:
        Reset(0, T::COUNT);
    }

-    PerfCounterStatistics& GetCounter(int t)
+    PerfCounterStatistics& GetCounter(int t) const
    {
        return m_perfCounterStat[t];
    }
@ -581,6 +581,11 @@ public:
        return m_perfCounterStat[t];
    }

+    const PerfCounterStatistics& operator [] (int t) const
+    {
+        return m_perfCounterStat[t];
+    }
+
    void Enable()
    {
        for (int i = 0; i < T::COUNT; ++i)
--- a/Tools/WinMLRunner/TypeHelper.h
+++ b/Tools/WinMLRunner/TypeHelper.h
@ -0,0 +1,99 @@
+#pragma once
+#include "Common.h"
+
+using namespace winrt::Windows::AI::MachineLearning;
+using namespace winrt::Windows::Graphics::DirectX;
+using namespace winrt::Windows::Graphics::Imaging;
+
+enum class InputBindingType { CPU, GPU };
+enum class InputDataType { Tensor, ImageRGB, ImageBGR };
+enum class InputSourceType { ImageFile, CSVFile, GeneratedData };
+enum class DeviceType { CPU, DefaultGPU, MinPowerGPU, HighPerfGPU };
+
+class TypeHelper
+{
+public:
+    static std::string Stringify(InputDataType inputDataType)
+    {
+        switch (inputDataType)
+        {
+            case InputDataType::Tensor: return "Tensor";
+            case InputDataType::ImageRGB: return "RGB Image";
+            case InputDataType::ImageBGR: return "BGR Image";
+        }
+
+        throw "No name found for this InputDataType";
+    }
+
+    static std::string Stringify(InputBindingType inputBindingType)
+    {
+        switch (inputBindingType)
+        {
+            case InputBindingType::CPU: return "CPU";
+            case InputBindingType::GPU: return "GPU";
+        }
+
+        throw "No name found for this InputBindingType.";
+    }
+
+    static std::string Stringify(DeviceType deviceType)
+    {
+        switch (deviceType)
+        {
+            case DeviceType::CPU: return "CPU";
+            case DeviceType::DefaultGPU: return "GPU";
+            case DeviceType::MinPowerGPU: return "GPU (min power)";
+            case DeviceType::HighPerfGPU: return "GPU (high perf)";
+        }
+
+        throw "No name found for this DeviceType.";
+    }
+
+    static std::string Stringify(InputSourceType inputSourceType)
+    {
+        switch (inputSourceType)
+        {
+            case InputSourceType::ImageFile: return "Image File";
+            case InputSourceType::CSVFile: return "CSV File";
+            case InputSourceType::GeneratedData: return "Generated Data";
+        }
+
+        throw "No name found for this DeviceType.";
+    }
+
+    static LearningModelDeviceKind GetWinmlDeviceKind(DeviceType deviceType)
+    {
+        switch (deviceType)
+        {
+            case DeviceType::CPU: return LearningModelDeviceKind::Cpu;
+            case DeviceType::DefaultGPU: return LearningModelDeviceKind::DirectX;
+            case DeviceType::MinPowerGPU: return LearningModelDeviceKind::DirectXMinPower;
+            case DeviceType::HighPerfGPU: return LearningModelDeviceKind::DirectXHighPerformance;
+        }
+
+        throw "No LearningModelDeviceKind found for this DeviceType.";
+    }
+
+    static BitmapPixelFormat GetBitmapPixelFormat(InputDataType inputDataType)
+    {
+        switch (inputDataType)
+        {
+            case InputDataType::ImageRGB: return BitmapPixelFormat::Rgba8;
+            case InputDataType::ImageBGR: return BitmapPixelFormat::Bgra8;
+        }
+
+        throw "No BitmapPixelFormat found for this InputDataType.";
+    }
+
+    static DirectXPixelFormat GetDirectXPixelFormat(InputDataType inputDataType)
+    {
+        switch (inputDataType)
+        {
+            case InputDataType::ImageRGB: return DirectXPixelFormat::R8G8B8A8UInt;
+            case InputDataType::ImageBGR: return DirectXPixelFormat::B8G8R8A8UIntNormalized;
+        }
+
+        throw "No DirectXPixelFormat found for this InputDataType.";
+    }
+};
+
--- a/Tools/WinMLRunner/WinMLRunner.vcxproj
+++ b/Tools/WinMLRunner/WinMLRunner.vcxproj
@ -19,16 +19,20 @@
    </ProjectConfiguration>
  </ItemGroup>
  <ItemGroup>
+    <ClCompile Include="Filehelper.cpp" />
    <ClCompile Include="CommandLineArgs.cpp" />
+    <ClCompile Include="dllload.cpp" />
    <ClCompile Include="Main.cpp" />
  </ItemGroup>
  <ItemGroup>
+    <ClInclude Include="Filehelper.h" />
    <ClInclude Include="BindingUtilities.h" />
    <ClInclude Include="CommandLineArgs.h" />
    <ClInclude Include="Common.h" />
    <ClInclude Include="OutputHelper.h" />
    <ClInclude Include="ModelBinding.h" />
    <ClInclude Include="TimerHelper.h" />
+    <ClInclude Include="TypeHelper.h" />
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <CppWinRTEnabled>true</CppWinRTEnabled>
--- a/Tools/WinMLRunner/dllload.cpp
+++ b/Tools/WinMLRunner/dllload.cpp
@ -0,0 +1,73 @@
+#include "FileHelper.h"
+#include <winrt/Windows.Foundation.h>
+#include <winstring.h>
+
+extern "C"
+{
+    HRESULT __stdcall OS_RoGetActivationFactory(HSTRING classId, GUID const& iid, void** factory) noexcept;
+}
+
+#ifdef _M_IX86
+#pragma comment(linker, "/alternatename:_OS_RoGetActivationFactory@12=_RoGetActivationFactory@12")
+#else
+#pragma comment(linker, "/alternatename:OS_RoGetActivationFactory=RoGetActivationFactory")
+#endif
+
+
+
+bool starts_with(std::wstring_view value, std::wstring_view match) noexcept
+{
+    return 0 == value.compare(0, match.size(), match);
+}
+
+int32_t __stdcall WINRT_RoGetActivationFactory(void* classId, winrt::guid const& iid, void** factory) noexcept
+{
+    *factory = nullptr;
+    std::wstring_view name{ WindowsGetStringRawBuffer(static_cast<HSTRING>(classId), nullptr), WindowsGetStringLen(static_cast<HSTRING>(classId)) };
+    HMODULE library{ nullptr };
+
+    std::wstring winmlDllPath = FileHelper::GetModulePath() + L"Windows.AI.MachineLearning.dll";
+
+    if (starts_with(name, L"Windows.AI.MachineLearning."))
+    {
+        const wchar_t* libPath = winmlDllPath.c_str();
+        library = LoadLibraryW(libPath);
+    }
+    else
+    {
+        return OS_RoGetActivationFactory(static_cast<HSTRING>(classId), iid, factory);
+    }
+
+    // If the library is not found, get the default one
+    if (!library)
+    {
+        return OS_RoGetActivationFactory(static_cast<HSTRING>(classId), iid, factory);
+    }
+
+    using DllGetActivationFactory = HRESULT __stdcall(HSTRING classId, void** factory);
+    auto call = reinterpret_cast<DllGetActivationFactory*>(GetProcAddress(library, "DllGetActivationFactory"));
+
+    if (!call)
+    {
+        HRESULT const hr = HRESULT_FROM_WIN32(GetLastError());
+        WINRT_VERIFY(FreeLibrary(library));
+        return hr;
+    }
+
+    winrt::com_ptr<winrt::Windows::Foundation::IActivationFactory> activation_factory;
+    HRESULT const hr = call(static_cast<HSTRING>(classId), activation_factory.put_void());
+
+    if (FAILED(hr))
+    {
+        WINRT_VERIFY(FreeLibrary(library));
+        return hr;
+    }
+
+    if (iid != winrt::guid_of<winrt::Windows::Foundation::IActivationFactory>())
+    {
+        return activation_factory->QueryInterface(iid, factory);
+    }
+
+    *factory = activation_factory.detach();
+    return S_OK;
+}