Merged PR 4815: Added Sample Featurizer and Infrastructure

Added Sample Featurizer and Infrastructure
This commit is contained in:
David Brownell 2019-07-26 21:18:30 +00:00
Родитель 10294a6334
Коммит 1357c64e35
11 изменённых файлов: 623 добавлений и 8 удалений

Просмотреть файл

@ -12,8 +12,20 @@ stages:
operating_system: Windows
configuration: x64
# TODO: Boost lib does not currently support Linux - template: BuildAndTest.template.yaml
# TODO: Boost lib does not currently support Linux parameters:
# TODO: Boost lib does not currently support Linux agent_pool: ubuntu-16.04
# TODO: Boost lib does not currently support Linux operating_system: Linux
# TODO: Boost lib does not currently support Linux configuration: x64
- template: BuildAndTest.template.yaml
parameters:
agent_pool: ubuntu-16.04
operating_system: Linux
configuration: x64
agent_pool: vs2015-win2012r2
operating_system: Windows
configuration: featurizer_prep
# TODO: Boost lib does not currently support Linux - template: BuildAndTest.template.yaml
# TODO: Boost lib does not currently support Linux parameters:
# TODO: Boost lib does not currently support Linux agent_pool: ubuntu-16.04
# TODO: Boost lib does not currently support Linux operating_system: Linux
# TODO: Boost lib does not currently support Linux configuration: featurizer_prep

Просмотреть файл

@ -22,6 +22,7 @@
# |
# ----------------------------------------------------------------------
import copy
import os
import sys
@ -106,9 +107,32 @@ def GetDependencies():
"{}-ex".format(architecture),
"https://github.com/davidbrownell/Common_cpp_Clang_8.git",
),
# TODO: This configuration doesn't depend on boost, however there are some tests associated with the
# `featurization_prep` configuration do. Include it for now, as there isn't a way to specify
# configuration-specific tests at this time. Remove the following dependency once there is a
# way to communicate this information.
Dependency(
"407DD743110A4FB1871AEF60CBEC99A0",
"Common_cpp_boost_1.70.0",
"standard",
"https://github.com/davidbrownell/Common_cpp_boost_1.70.0.git",
),
],
)
d["featurizer_prep"] = copy.deepcopy(d["x64"])
# TODO: Enable this once the TODO comment above is resolved.
#
# d["featurizer_prep"].Dependencies.append(
# Dependency(
# "407DD743110A4FB1871AEF60CBEC99A0",
# "Common_cpp_boost_1.70.0",
# "standard",
# "https://github.com/davidbrownell/Common_cpp_boost_1.70.0.git",
# ),
# )
return d
@ -117,7 +141,7 @@ def GetCustomActions(debug, verbose, explicit_configurations):
"""
Returns an action or list of actions that should be invoked as part of the setup process.
Actions are generic command line statements defined in
Actions are generic command line statements defined in
<Common_Environment>/Libraries/Python/CommonEnvironment/v1.0/CommonEnvironment/Shell/Commands/__init__.py
that are converted into statements appropriate for the current scripting language (in most
cases, this is Bash on Linux systems and Batch or PowerShell on Windows systems.

Просмотреть файл

@ -45,6 +45,8 @@ _REPO_DATA = [
("Common_cpp_Clang_8", 'git clone https://github.com/davidbrownell/Common_cpp_Clang_8 "{output_dir}"', None),
("Common_cpp_Clang_Common", 'git clone https://github.com/davidbrownell/Common_cpp_Clang_Common "{output_dir}"', None),
("Common_cpp_Common", 'git clone https://github.com/davidbrownell/Common_cpp_Common "{output_dir}"', None),
("Common_cpp_boost_Common", 'git clone https://github.com/davidbrownell/Common_cpp_boost_Common "{output_dir}"', None),
("Common_cpp_boost_1.70.0", 'git clone https://github.com/davidbrownell/Common_cpp_boost_1.70.0 "{output_dir}"', '"/configuration=standard" "/configuration=MSVC-2019-x64"'),
]
if CurrentShell.CategoryName == "Linux":
@ -61,7 +63,7 @@ elif CurrentShell.CategoryName == "Windows":
else:
raise Exception("'{}' is not supported OS".format(CurrentShell.CategoryName))
_ACTIVATION_REPO_CONFIGURATION = "x64"
_ACTIVATION_REPO_CONFIGURATION = "<x64|featurizer_prep>"
# ----------------------------------------------------------------------
inflect = inflect_mod.engine()
@ -177,9 +179,7 @@ def EntryPoint(
suffix=data[2] or "",
)
if CurrentShell.CategoryName == "Windows":
command_line = command_line.replace("=", "_EQ_")
elif CurrentShell.CategoryName == "Linux":
if CurrentShell.CategoryName == "Linux":
command_line = "./{}".format(command_line)
sink = six.moves.StringIO()

Просмотреть файл

@ -0,0 +1,175 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include <memory>
#include <tuple>
#include <boost/serialization/access.hpp>
#include <boost/serialization/base_object.hpp>
#include <boost/serialization/nvp.hpp>
namespace Microsoft {
namespace Featurizer {
/////////////////////////////////////////////////////////////////////////
/// \class Transformer
/// \brief Transforms a single "value" and output the result.
/// A value can be anything from an integer to a collection
/// of integers.
///
template <typename ReturnT, typename ArgT>
class Transformer {
public:
// ----------------------------------------------------------------------
// | Public Types
using return_type = ReturnT;
using arg_type = ArgT;
using transformer_type = Transformer<ReturnT, ArgT>;
// ----------------------------------------------------------------------
// | Public Methods
Transformer(void) = default;
virtual ~Transformer(void) = default;
Transformer(Transformer const &) = delete;
Transformer & operator =(Transformer const &) = delete;
Transformer(Transformer &&) = default;
Transformer & operator =(Transformer &&) = delete;
virtual return_type transform(arg_type const &arg) const = 0;
private:
// ----------------------------------------------------------------------
// | Relationships
friend class boost::serialization::access;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &, unsigned int const /*version*/);
};
/////////////////////////////////////////////////////////////////////////
/// \class Estimator
/// \brief Collects state over a collection of data, then produces
/// a `Transformer` that is able to operate on that collected
/// state.
///
template <typename ReturnT, typename ArgT>
class Estimator {
public:
// ----------------------------------------------------------------------
// | Public Types
using transformer_type = Transformer<ReturnT, ArgT>;
using TransformerUniquePtr = std::unique_ptr<transformer_type>;
using estimator_type = Estimator<ReturnT, ArgT>;
using apache_arrow = unsigned long; // TODO: Temp type as we figure out what will eventually be here
// ----------------------------------------------------------------------
// | Public Methods
Estimator(void) = default;
virtual ~Estimator(void) = default;
Estimator(Estimator const &) = delete;
Estimator & operator =(Estimator const &) = delete;
Estimator(Estimator &&) = default;
Estimator & operator =(Estimator &&) = delete;
// This method can be called repeatedly in the support of streaming scenarios
Estimator & fit(apache_arrow const &data);
// Calls to `commit` are destructive - all previously generated state should
// be reset. `Estimator` objects that want to share state prior to calls to commit
// should implement a `copy` method.
TransformerUniquePtr commit(void);
private:
// ----------------------------------------------------------------------
// | Relationships
friend class boost::serialization::access;
// ----------------------------------------------------------------------
// | Private Data
bool _committed = false;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &, unsigned int const /*version*/);
virtual Estimator & fit_impl(apache_arrow const &data) = 0;
virtual TransformerUniquePtr commit_impl(void) = 0;
};
template <typename EstimatorT, typename... EstimatorConstructorArgsT>
typename EstimatorT::TransformerUniquePtr fit_and_commit(typename EstimatorT::apache_arrow const &data, EstimatorConstructorArgsT &&...args);
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Implementation
// |
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Transformer
// |
// ----------------------------------------------------------------------
template <typename ReturnT, typename ArgT>
template <typename ArchiveT>
void Transformer<ReturnT, ArgT>::serialize(ArchiveT & /*ar*/, unsigned int const /*version*/) {
}
// ----------------------------------------------------------------------
// |
// | Estimator
// |
// ----------------------------------------------------------------------
template <typename ReturnT, typename ArgT>
Estimator<ReturnT, ArgT> & Estimator<ReturnT, ArgT>::fit(apache_arrow const &data) {
if(_committed)
throw std::runtime_error("This instance has already been committed");
return fit_impl(data);
}
template <typename ReturnT, typename ArgT>
typename Estimator<ReturnT, ArgT>::TransformerUniquePtr Estimator<ReturnT, ArgT>::commit(void) {
if(_committed)
throw std::runtime_error("This instance has already been committed");
TransformerUniquePtr result(commit_impl());
if(!result)
throw std::runtime_error("Invalid result");
_committed = true;
return result;
}
template <typename ReturnT, typename ArgT>
template <typename ArchiveT>
void Estimator<ReturnT, ArgT>::serialize(ArchiveT & /*ar*/, unsigned int const /*version*/) {
}
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
template <typename EstimatorT, typename... EstimatorConstructorArgsT>
typename EstimatorT::TransformerUniquePtr fit_and_commit(typename EstimatorT::apache_arrow const &data, EstimatorConstructorArgsT &&...args) {
return EstimatorT(std::forward<EstimatorConstructorArgsT>(args)...).fit(data).commit();
}
} // namespace Featurizer
} // namespace Microsoft

Просмотреть файл

@ -0,0 +1,40 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#include "SampleAdd.h"
namespace Microsoft {
namespace Featurizer {
namespace SampleAdd {
// ----------------------------------------------------------------------
// |
// | Transformer
// |
// ----------------------------------------------------------------------
Transformer::Transformer(std::uint16_t delta) :
_delta(delta) {
}
Transformer::return_type Transformer::transform(arg_type const &arg) const /*override*/ {
return _delta + arg;
}
// ----------------------------------------------------------------------
// |
// | Estimator
// |
// ----------------------------------------------------------------------
Estimator & Estimator::fit_impl(apache_arrow const &data) /*override*/ {
_accumulated_delta += static_cast<std::uint16_t>(data);
return *this;
}
Estimator::TransformerUniquePtr Estimator::commit_impl(void) /*override*/ {
return std::make_unique<SampleAdd::Transformer>(_accumulated_delta);
}
} // namespace SampleAdd
} // namespace Featurizer
} // namespace Microsoft

Просмотреть файл

@ -0,0 +1,126 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include "../Featurizer.h"
namespace Microsoft {
namespace Featurizer {
/////////////////////////////////////////////////////////////////////////
/// \namespace SampleAdd
/// \brief A Transformer and Estimator that add values. This is a
/// sample intended to demonstrate patterns within the
/// implementation of these types.
///
namespace SampleAdd {
/////////////////////////////////////////////////////////////////////////
/// \class Transformer
/// \brief Transformer that adds an integer value to a saved delta
/// and returns the result.
///
class Transformer : public Microsoft::Featurizer::Transformer<std::uint32_t, std::uint16_t> {
public:
// ----------------------------------------------------------------------
// | Public Methods
Transformer(std::uint16_t delta=0);
~Transformer(void) override = default;
Transformer(Transformer const &) = delete;
Transformer & operator =(Transformer const &) = delete;
Transformer(Transformer &&) = default;
Transformer & operator =(Transformer &&) = delete;
return_type transform(arg_type const &arg) const override;
private:
// ----------------------------------------------------------------------
// | Relationships
friend class boost::serialization::access;
// ----------------------------------------------------------------------
// | Private Data
std::uint32_t const _delta;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const version);
};
/////////////////////////////////////////////////////////////////////////
/// \class Estimator
/// \brief Estimator that accumulates a delta value and then
/// creates a Transformer with than value when requested.
///
class Estimator : public Microsoft::Featurizer::Estimator<std::uint32_t, std::uint16_t> {
public:
// ----------------------------------------------------------------------
// | Public Methods
Estimator(void) = default;
~Estimator(void) override = default;
Estimator(Estimator const &) = delete;
Estimator & operator =(Estimator const &) = delete;
Estimator(Estimator &&) = default;
Estimator & operator =(Estimator &&) = delete;
private:
// ----------------------------------------------------------------------
// | Relationships
friend class boost::serialization::access;
// ----------------------------------------------------------------------
// | Private Data
std::uint32_t _accumulated_delta = 0;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const version);
Estimator & fit_impl(apache_arrow const &data) override;
TransformerUniquePtr commit_impl(void) override;
};
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Implementation
// |
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// ----------------------------------------------------------------------
// |
// | Transformer
// |
// ----------------------------------------------------------------------
template <typename ArchiveT>
void Transformer::serialize(ArchiveT &ar, unsigned int const version) {
ar & boost::serialization::base_object<Microsoft::Featurizer::Transformer>(*this);
ar & boost::serialization::make_nvp("delta", _delta);
}
// ----------------------------------------------------------------------
// |
// | Estimator
// |
// ----------------------------------------------------------------------
template <typename ArchiveT>
void Estimator::serialize(ArchiveT &ar, unsigned int const version) {
ar & boost::serialization::base_object<Microsoft::Featurizer::Estimator>(*this);
ar & boost::serialization::make_nvp("accumulated_delta", _accumulated_delta);
}
} // namespace SampleAdd
} // namespace Featurizer
} // namespace Microsoft

Просмотреть файл

@ -0,0 +1,44 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
cmake_minimum_required(VERSION 3.5.0)
project(Featurizer_UnitTests LANGUAGES CXX)
set(CMAKE_MODULE_PATH "$ENV{DEVELOPMENT_ENVIRONMENT_CMAKE_MODULE_PATH}")
if(NOT WIN32)
string(REPLACE ":" ";" CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}")
string(REPLACE ":" ";" _includes "$ENV{INCLUDE}")
string(REPLACE ":" ";" _libs "$ENV{LIB}")
endif()
set(CppCommon_STATIC_CRT ON CACHE BOOL "" FORCE)
include(CppCommon)
include(BoostCommon)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
add_library(libFeaturizers STATIC
../SampleAdd.h
../SampleAdd.cpp
)
enable_testing()
foreach(_test_name IN ITEMS
SampleAdd_UnitTest
)
add_executable(${_test_name} ${_test_name}.cpp)
target_include_directories(${_test_name} PRIVATE ${_includes})
target_link_directories(${_test_name} PRIVATE ${_libs})
target_link_libraries(${_test_name} PRIVATE ${Boost_LIBRARIES} libFeaturizers)
add_test(NAME ${_test_name} COMMAND ${_test_name} --success)
endforeach()

Просмотреть файл

@ -0,0 +1,22 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#include "../SampleAdd.h"
TEST_CASE("Transformer") {
CHECK(Microsoft::Featurizer::SampleAdd::Transformer(10).transform(20) == 30);
CHECK(Microsoft::Featurizer::SampleAdd::Transformer(20).transform(1) == 21);
}
TEST_CASE("Estimator") {
CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).commit()->transform(20) == 30);
CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(20).commit()->transform(1) == 21);
CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).fit(20).commit()->transform(20) == 50);
CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).fit(20).fit(30).commit()->transform(20) == 80);
}

Просмотреть файл

@ -0,0 +1,5 @@
filter:
includes:
- Microsoft::Featurizer::*
excludes:
- std::*

Просмотреть файл

@ -0,0 +1,39 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
cmake_minimum_required(VERSION 3.5.0)
project(Featurizer_UnitTests LANGUAGES CXX)
set(CMAKE_MODULE_PATH "$ENV{DEVELOPMENT_ENVIRONMENT_CMAKE_MODULE_PATH}")
if(NOT WIN32)
string(REPLACE ":" ";" CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}")
string(REPLACE ":" ";" _includes "$ENV{INCLUDE}")
string(REPLACE ":" ";" _libs "$ENV{LIB}")
endif()
set(CppCommon_STATIC_CRT ON CACHE BOOL "" FORCE)
include(CppCommon)
include(BoostCommon)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
enable_testing()
foreach(_test_name IN ITEMS
Featurizer_UnitTest
)
add_executable(${_test_name} ${_test_name}.cpp)
target_include_directories(${_test_name} PRIVATE ${_includes})
target_link_directories(${_test_name} PRIVATE ${_libs})
target_link_libraries(${_test_name} PRIVATE ${Boost_LIBRARIES})
add_test(NAME ${_test_name} COMMAND ${_test_name} --success)
endforeach()

Просмотреть файл

@ -0,0 +1,128 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#include "../Featurizer.h"
class MyTransformer : public Microsoft::Featurizer::Transformer<bool, int> {
public:
// ----------------------------------------------------------------------
// | Public Methods
MyTransformer(bool true_on_odd=false) :
_true_on_odd(true_on_odd) {
}
~MyTransformer(void) override = default;
MyTransformer(MyTransformer const &) = delete;
MyTransformer & operator =(MyTransformer const &) = delete;
MyTransformer(MyTransformer &&) = default;
MyTransformer & operator =(MyTransformer &&) = delete;
return_type transform(arg_type const &arg) const override {
bool const is_odd(arg & 1);
return _true_on_odd ? is_odd : !is_odd;
}
private:
// ----------------------------------------------------------------------
// | Relationships
friend class boost::serialization::access;
// ----------------------------------------------------------------------
// | Private Data
bool const _true_on_odd;
// ----------------------------------------------------------------------
// | Private Methods
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const /*version*/) {
ar & boost::serialization::base_object<transformer_type>(*this);
ar & boost::serialization::make_nvp("true_on_odd", const_cast<bool &>(_true_on_odd));
}
};
class MyEstimator : public Microsoft::Featurizer::Estimator<bool, int> {
public:
// ----------------------------------------------------------------------
// | Public Methods
MyEstimator(bool return_invalid_transformer=false) :
_return_invalid_transformer(return_invalid_transformer) {
}
~MyEstimator(void) override = default;
MyEstimator(MyEstimator const &) = delete;
MyEstimator & operator =(MyEstimator const &) = delete;
MyEstimator(MyEstimator &&) = default;
MyEstimator & operator =(MyEstimator &&) = delete;
private:
// ----------------------------------------------------------------------
// | Relationships
friend class boost::serialization::access;
// ----------------------------------------------------------------------
// | Private Data
bool const _return_invalid_transformer;
bool _true_on_odd_state;
// ----------------------------------------------------------------------
// | Private Methods
MyEstimator & fit_impl(apache_arrow const &data) override {
_true_on_odd_state = static_cast<bool>(data);
return *this;
}
TransformerUniquePtr commit_impl(void) override {
if(_return_invalid_transformer)
return TransformerUniquePtr();
return std::make_unique<MyTransformer>(_true_on_odd_state);
}
template <typename ArchiveT>
void serialize(ArchiveT &ar, unsigned int const /*version*/) {
ar & boost::serialization::base_object<estimator_type>(*this);
ar & boost::serialization::make_nvp("return_invalid_transformer", const_cast<bool &>(_return_invalid_transformer));
ar & boost::serialization::make_nvp("true_on_odd_state", const_cast<bool &>(_true_on_odd_state));
}
};
TEST_CASE("Transformer: Functionality") {
CHECK(MyTransformer(true).transform(1) == true);
CHECK(MyTransformer(false).transform(1) == false);
CHECK(MyTransformer(true).transform(2) == false);
CHECK(MyTransformer(false).transform(2) == true);
}
TEST_CASE("Estimator: Functionality") {
CHECK(MyEstimator().fit(1).commit()->transform(1) == true);
CHECK(MyEstimator().fit(0).commit()->transform(1) == false);
CHECK(MyEstimator().fit(1).commit()->transform(2) == false);
CHECK(MyEstimator().fit(0).commit()->transform(2) == true);
}
TEST_CASE("Estimator: Errors") {
MyEstimator e;
CHECK(e.commit());
CHECK_THROWS_WITH(e.fit(1), Catch::Contains("has already been committed"));
CHECK_THROWS_WITH(e.commit(), Catch::Contains("has already been committed"));
CHECK_THROWS_WITH(MyEstimator(true).commit(), Catch::Matches("Invalid result"));
}
TEST_CASE("fit_and_commit") {
CHECK(Microsoft::Featurizer::fit_and_commit<MyEstimator>(1, false)->transform(1) == true);
CHECK(Microsoft::Featurizer::fit_and_commit<MyEstimator>(0, false)->transform(1) == false);
CHECK(Microsoft::Featurizer::fit_and_commit<MyEstimator>(1, false)->transform(2) == false);
CHECK(Microsoft::Featurizer::fit_and_commit<MyEstimator>(0, false)->transform(2) == true);
}