From 1357c64e3513bd0e07ce62f9d794502ccbe07dfc Mon Sep 17 00:00:00 2001 From: David Brownell Date: Fri, 26 Jul 2019 21:18:30 +0000 Subject: [PATCH] Merged PR 4815: Added Sample Featurizer and Infrastructure Added Sample Featurizer and Infrastructure --- CI/EntryPoint.yaml | 18 +- Setup_custom.py | 26 ++- bootstrap_impl.py | 8 +- src/FeaturizerPrep/Featurizer.h | 175 ++++++++++++++++++ src/FeaturizerPrep/Featurizers/SampleAdd.cpp | 40 ++++ src/FeaturizerPrep/Featurizers/SampleAdd.h | 126 +++++++++++++ .../Featurizers/UnitTests/CMakeLists.txt | 44 +++++ .../UnitTests/SampleAdd_UnitTest.cpp | 22 +++ .../Featurizers/UnitTests/code_coverage.yaml | 5 + src/FeaturizerPrep/UnitTests/CMakeLists.txt | 39 ++++ .../UnitTests/Featurizer_UnitTest.cpp | 128 +++++++++++++ 11 files changed, 623 insertions(+), 8 deletions(-) create mode 100644 src/FeaturizerPrep/Featurizer.h create mode 100644 src/FeaturizerPrep/Featurizers/SampleAdd.cpp create mode 100644 src/FeaturizerPrep/Featurizers/SampleAdd.h create mode 100644 src/FeaturizerPrep/Featurizers/UnitTests/CMakeLists.txt create mode 100644 src/FeaturizerPrep/Featurizers/UnitTests/SampleAdd_UnitTest.cpp create mode 100644 src/FeaturizerPrep/Featurizers/UnitTests/code_coverage.yaml create mode 100644 src/FeaturizerPrep/UnitTests/CMakeLists.txt create mode 100644 src/FeaturizerPrep/UnitTests/Featurizer_UnitTest.cpp diff --git a/CI/EntryPoint.yaml b/CI/EntryPoint.yaml index a62b6b1..059a928 100644 --- a/CI/EntryPoint.yaml +++ b/CI/EntryPoint.yaml @@ -12,8 +12,20 @@ stages: operating_system: Windows configuration: x64 +# TODO: Boost lib does not currently support Linux - template: BuildAndTest.template.yaml +# TODO: Boost lib does not currently support Linux parameters: +# TODO: Boost lib does not currently support Linux agent_pool: ubuntu-16.04 +# TODO: Boost lib does not currently support Linux operating_system: Linux +# TODO: Boost lib does not currently support Linux configuration: x64 + - template: BuildAndTest.template.yaml parameters: - agent_pool: ubuntu-16.04 - operating_system: Linux - configuration: x64 + agent_pool: vs2015-win2012r2 + operating_system: Windows + configuration: featurizer_prep + +# TODO: Boost lib does not currently support Linux - template: BuildAndTest.template.yaml +# TODO: Boost lib does not currently support Linux parameters: +# TODO: Boost lib does not currently support Linux agent_pool: ubuntu-16.04 +# TODO: Boost lib does not currently support Linux operating_system: Linux +# TODO: Boost lib does not currently support Linux configuration: featurizer_prep diff --git a/Setup_custom.py b/Setup_custom.py index 0309a52..002d313 100644 --- a/Setup_custom.py +++ b/Setup_custom.py @@ -22,6 +22,7 @@ # | # ---------------------------------------------------------------------- +import copy import os import sys @@ -106,9 +107,32 @@ def GetDependencies(): "{}-ex".format(architecture), "https://github.com/davidbrownell/Common_cpp_Clang_8.git", ), + # TODO: This configuration doesn't depend on boost, however there are some tests associated with the + # `featurization_prep` configuration do. Include it for now, as there isn't a way to specify + # configuration-specific tests at this time. Remove the following dependency once there is a + # way to communicate this information. + Dependency( + "407DD743110A4FB1871AEF60CBEC99A0", + "Common_cpp_boost_1.70.0", + "standard", + "https://github.com/davidbrownell/Common_cpp_boost_1.70.0.git", + ), ], ) + d["featurizer_prep"] = copy.deepcopy(d["x64"]) + + # TODO: Enable this once the TODO comment above is resolved. + # + # d["featurizer_prep"].Dependencies.append( + # Dependency( + # "407DD743110A4FB1871AEF60CBEC99A0", + # "Common_cpp_boost_1.70.0", + # "standard", + # "https://github.com/davidbrownell/Common_cpp_boost_1.70.0.git", + # ), + # ) + return d @@ -117,7 +141,7 @@ def GetCustomActions(debug, verbose, explicit_configurations): """ Returns an action or list of actions that should be invoked as part of the setup process. - Actions are generic command line statements defined in + Actions are generic command line statements defined in /Libraries/Python/CommonEnvironment/v1.0/CommonEnvironment/Shell/Commands/__init__.py that are converted into statements appropriate for the current scripting language (in most cases, this is Bash on Linux systems and Batch or PowerShell on Windows systems. diff --git a/bootstrap_impl.py b/bootstrap_impl.py index 89c5aac..22e4cf4 100644 --- a/bootstrap_impl.py +++ b/bootstrap_impl.py @@ -45,6 +45,8 @@ _REPO_DATA = [ ("Common_cpp_Clang_8", 'git clone https://github.com/davidbrownell/Common_cpp_Clang_8 "{output_dir}"', None), ("Common_cpp_Clang_Common", 'git clone https://github.com/davidbrownell/Common_cpp_Clang_Common "{output_dir}"', None), ("Common_cpp_Common", 'git clone https://github.com/davidbrownell/Common_cpp_Common "{output_dir}"', None), + ("Common_cpp_boost_Common", 'git clone https://github.com/davidbrownell/Common_cpp_boost_Common "{output_dir}"', None), + ("Common_cpp_boost_1.70.0", 'git clone https://github.com/davidbrownell/Common_cpp_boost_1.70.0 "{output_dir}"', '"/configuration=standard" "/configuration=MSVC-2019-x64"'), ] if CurrentShell.CategoryName == "Linux": @@ -61,7 +63,7 @@ elif CurrentShell.CategoryName == "Windows": else: raise Exception("'{}' is not supported OS".format(CurrentShell.CategoryName)) -_ACTIVATION_REPO_CONFIGURATION = "x64" +_ACTIVATION_REPO_CONFIGURATION = "" # ---------------------------------------------------------------------- inflect = inflect_mod.engine() @@ -177,9 +179,7 @@ def EntryPoint( suffix=data[2] or "", ) - if CurrentShell.CategoryName == "Windows": - command_line = command_line.replace("=", "_EQ_") - elif CurrentShell.CategoryName == "Linux": + if CurrentShell.CategoryName == "Linux": command_line = "./{}".format(command_line) sink = six.moves.StringIO() diff --git a/src/FeaturizerPrep/Featurizer.h b/src/FeaturizerPrep/Featurizer.h new file mode 100644 index 0000000..988ce8c --- /dev/null +++ b/src/FeaturizerPrep/Featurizer.h @@ -0,0 +1,175 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- +#pragma once + +#include +#include + +#include +#include +#include + +namespace Microsoft { +namespace Featurizer { + +///////////////////////////////////////////////////////////////////////// +/// \class Transformer +/// \brief Transforms a single "value" and output the result. +/// A value can be anything from an integer to a collection +/// of integers. +/// +template +class Transformer { +public: + // ---------------------------------------------------------------------- + // | Public Types + using return_type = ReturnT; + using arg_type = ArgT; + using transformer_type = Transformer; + + // ---------------------------------------------------------------------- + // | Public Methods + Transformer(void) = default; + virtual ~Transformer(void) = default; + + Transformer(Transformer const &) = delete; + Transformer & operator =(Transformer const &) = delete; + + Transformer(Transformer &&) = default; + Transformer & operator =(Transformer &&) = delete; + + virtual return_type transform(arg_type const &arg) const = 0; + +private: + // ---------------------------------------------------------------------- + // | Relationships + friend class boost::serialization::access; + + // ---------------------------------------------------------------------- + // | Private Methods + template + void serialize(ArchiveT &, unsigned int const /*version*/); +}; + +///////////////////////////////////////////////////////////////////////// +/// \class Estimator +/// \brief Collects state over a collection of data, then produces +/// a `Transformer` that is able to operate on that collected +/// state. +/// +template +class Estimator { +public: + // ---------------------------------------------------------------------- + // | Public Types + using transformer_type = Transformer; + using TransformerUniquePtr = std::unique_ptr; + + using estimator_type = Estimator; + + using apache_arrow = unsigned long; // TODO: Temp type as we figure out what will eventually be here + + // ---------------------------------------------------------------------- + // | Public Methods + Estimator(void) = default; + virtual ~Estimator(void) = default; + + Estimator(Estimator const &) = delete; + Estimator & operator =(Estimator const &) = delete; + + Estimator(Estimator &&) = default; + Estimator & operator =(Estimator &&) = delete; + + // This method can be called repeatedly in the support of streaming scenarios + Estimator & fit(apache_arrow const &data); + + // Calls to `commit` are destructive - all previously generated state should + // be reset. `Estimator` objects that want to share state prior to calls to commit + // should implement a `copy` method. + TransformerUniquePtr commit(void); + +private: + // ---------------------------------------------------------------------- + // | Relationships + friend class boost::serialization::access; + + // ---------------------------------------------------------------------- + // | Private Data + bool _committed = false; + + // ---------------------------------------------------------------------- + // | Private Methods + template + void serialize(ArchiveT &, unsigned int const /*version*/); + + virtual Estimator & fit_impl(apache_arrow const &data) = 0; + virtual TransformerUniquePtr commit_impl(void) = 0; +}; + +template +typename EstimatorT::TransformerUniquePtr fit_and_commit(typename EstimatorT::apache_arrow const &data, EstimatorConstructorArgsT &&...args); + +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +// | +// | Implementation +// | +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- + +// ---------------------------------------------------------------------- +// | +// | Transformer +// | +// ---------------------------------------------------------------------- +template +template +void Transformer::serialize(ArchiveT & /*ar*/, unsigned int const /*version*/) { +} + +// ---------------------------------------------------------------------- +// | +// | Estimator +// | +// ---------------------------------------------------------------------- +template +Estimator & Estimator::fit(apache_arrow const &data) { + if(_committed) + throw std::runtime_error("This instance has already been committed"); + + return fit_impl(data); +} + +template +typename Estimator::TransformerUniquePtr Estimator::commit(void) { + if(_committed) + throw std::runtime_error("This instance has already been committed"); + + TransformerUniquePtr result(commit_impl()); + + if(!result) + throw std::runtime_error("Invalid result"); + + _committed = true; + return result; +} + +template +template +void Estimator::serialize(ArchiveT & /*ar*/, unsigned int const /*version*/) { +} + +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +template +typename EstimatorT::TransformerUniquePtr fit_and_commit(typename EstimatorT::apache_arrow const &data, EstimatorConstructorArgsT &&...args) { + return EstimatorT(std::forward(args)...).fit(data).commit(); +} + +} // namespace Featurizer +} // namespace Microsoft diff --git a/src/FeaturizerPrep/Featurizers/SampleAdd.cpp b/src/FeaturizerPrep/Featurizers/SampleAdd.cpp new file mode 100644 index 0000000..d6c4abd --- /dev/null +++ b/src/FeaturizerPrep/Featurizers/SampleAdd.cpp @@ -0,0 +1,40 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- +#include "SampleAdd.h" + +namespace Microsoft { +namespace Featurizer { +namespace SampleAdd { + +// ---------------------------------------------------------------------- +// | +// | Transformer +// | +// ---------------------------------------------------------------------- +Transformer::Transformer(std::uint16_t delta) : + _delta(delta) { +} + +Transformer::return_type Transformer::transform(arg_type const &arg) const /*override*/ { + return _delta + arg; +} + +// ---------------------------------------------------------------------- +// | +// | Estimator +// | +// ---------------------------------------------------------------------- +Estimator & Estimator::fit_impl(apache_arrow const &data) /*override*/ { + _accumulated_delta += static_cast(data); + return *this; +} + +Estimator::TransformerUniquePtr Estimator::commit_impl(void) /*override*/ { + return std::make_unique(_accumulated_delta); +} + +} // namespace SampleAdd +} // namespace Featurizer +} // namespace Microsoft diff --git a/src/FeaturizerPrep/Featurizers/SampleAdd.h b/src/FeaturizerPrep/Featurizers/SampleAdd.h new file mode 100644 index 0000000..fe0af76 --- /dev/null +++ b/src/FeaturizerPrep/Featurizers/SampleAdd.h @@ -0,0 +1,126 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- +#pragma once + +#include "../Featurizer.h" + +namespace Microsoft { +namespace Featurizer { + +///////////////////////////////////////////////////////////////////////// +/// \namespace SampleAdd +/// \brief A Transformer and Estimator that add values. This is a +/// sample intended to demonstrate patterns within the +/// implementation of these types. +/// +namespace SampleAdd { + +///////////////////////////////////////////////////////////////////////// +/// \class Transformer +/// \brief Transformer that adds an integer value to a saved delta +/// and returns the result. +/// +class Transformer : public Microsoft::Featurizer::Transformer { +public: + // ---------------------------------------------------------------------- + // | Public Methods + Transformer(std::uint16_t delta=0); + ~Transformer(void) override = default; + + Transformer(Transformer const &) = delete; + Transformer & operator =(Transformer const &) = delete; + + Transformer(Transformer &&) = default; + Transformer & operator =(Transformer &&) = delete; + + return_type transform(arg_type const &arg) const override; + +private: + // ---------------------------------------------------------------------- + // | Relationships + friend class boost::serialization::access; + + // ---------------------------------------------------------------------- + // | Private Data + std::uint32_t const _delta; + + // ---------------------------------------------------------------------- + // | Private Methods + template + void serialize(ArchiveT &ar, unsigned int const version); +}; + +///////////////////////////////////////////////////////////////////////// +/// \class Estimator +/// \brief Estimator that accumulates a delta value and then +/// creates a Transformer with than value when requested. +/// +class Estimator : public Microsoft::Featurizer::Estimator { +public: + // ---------------------------------------------------------------------- + // | Public Methods + Estimator(void) = default; + ~Estimator(void) override = default; + + Estimator(Estimator const &) = delete; + Estimator & operator =(Estimator const &) = delete; + + Estimator(Estimator &&) = default; + Estimator & operator =(Estimator &&) = delete; + +private: + // ---------------------------------------------------------------------- + // | Relationships + friend class boost::serialization::access; + + // ---------------------------------------------------------------------- + // | Private Data + std::uint32_t _accumulated_delta = 0; + + // ---------------------------------------------------------------------- + // | Private Methods + template + void serialize(ArchiveT &ar, unsigned int const version); + + Estimator & fit_impl(apache_arrow const &data) override; + TransformerUniquePtr commit_impl(void) override; +}; + +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +// | +// | Implementation +// | +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- +// ---------------------------------------------------------------------- + +// ---------------------------------------------------------------------- +// | +// | Transformer +// | +// ---------------------------------------------------------------------- +template +void Transformer::serialize(ArchiveT &ar, unsigned int const version) { + ar & boost::serialization::base_object(*this); + ar & boost::serialization::make_nvp("delta", _delta); +} + +// ---------------------------------------------------------------------- +// | +// | Estimator +// | +// ---------------------------------------------------------------------- +template +void Estimator::serialize(ArchiveT &ar, unsigned int const version) { + ar & boost::serialization::base_object(*this); + ar & boost::serialization::make_nvp("accumulated_delta", _accumulated_delta); +} + +} // namespace SampleAdd + +} // namespace Featurizer +} // namespace Microsoft diff --git a/src/FeaturizerPrep/Featurizers/UnitTests/CMakeLists.txt b/src/FeaturizerPrep/Featurizers/UnitTests/CMakeLists.txt new file mode 100644 index 0000000..0ad1d65 --- /dev/null +++ b/src/FeaturizerPrep/Featurizers/UnitTests/CMakeLists.txt @@ -0,0 +1,44 @@ +# ---------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License +# ---------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.5.0) + +project(Featurizer_UnitTests LANGUAGES CXX) + +set(CMAKE_MODULE_PATH "$ENV{DEVELOPMENT_ENVIRONMENT_CMAKE_MODULE_PATH}") + +if(NOT WIN32) + string(REPLACE ":" ";" CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}") + string(REPLACE ":" ";" _includes "$ENV{INCLUDE}") + string(REPLACE ":" ";" _libs "$ENV{LIB}") +endif() + +set(CppCommon_STATIC_CRT ON CACHE BOOL "" FORCE) + +include(CppCommon) +include(BoostCommon) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +add_library(libFeaturizers STATIC + ../SampleAdd.h + ../SampleAdd.cpp +) + +enable_testing() + +foreach(_test_name IN ITEMS + SampleAdd_UnitTest +) + add_executable(${_test_name} ${_test_name}.cpp) + + target_include_directories(${_test_name} PRIVATE ${_includes}) + target_link_directories(${_test_name} PRIVATE ${_libs}) + + target_link_libraries(${_test_name} PRIVATE ${Boost_LIBRARIES} libFeaturizers) + + add_test(NAME ${_test_name} COMMAND ${_test_name} --success) +endforeach() diff --git a/src/FeaturizerPrep/Featurizers/UnitTests/SampleAdd_UnitTest.cpp b/src/FeaturizerPrep/Featurizers/UnitTests/SampleAdd_UnitTest.cpp new file mode 100644 index 0000000..87892c7 --- /dev/null +++ b/src/FeaturizerPrep/Featurizers/UnitTests/SampleAdd_UnitTest.cpp @@ -0,0 +1,22 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- + +#define CATCH_CONFIG_MAIN +#include "catch.hpp" + +#include "../SampleAdd.h" + +TEST_CASE("Transformer") { + CHECK(Microsoft::Featurizer::SampleAdd::Transformer(10).transform(20) == 30); + CHECK(Microsoft::Featurizer::SampleAdd::Transformer(20).transform(1) == 21); +} + +TEST_CASE("Estimator") { + CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).commit()->transform(20) == 30); + CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(20).commit()->transform(1) == 21); + + CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).fit(20).commit()->transform(20) == 50); + CHECK(Microsoft::Featurizer::SampleAdd::Estimator().fit(10).fit(20).fit(30).commit()->transform(20) == 80); +} diff --git a/src/FeaturizerPrep/Featurizers/UnitTests/code_coverage.yaml b/src/FeaturizerPrep/Featurizers/UnitTests/code_coverage.yaml new file mode 100644 index 0000000..e3f0689 --- /dev/null +++ b/src/FeaturizerPrep/Featurizers/UnitTests/code_coverage.yaml @@ -0,0 +1,5 @@ +filter: + includes: + - Microsoft::Featurizer::* + excludes: + - std::* diff --git a/src/FeaturizerPrep/UnitTests/CMakeLists.txt b/src/FeaturizerPrep/UnitTests/CMakeLists.txt new file mode 100644 index 0000000..9ca28a4 --- /dev/null +++ b/src/FeaturizerPrep/UnitTests/CMakeLists.txt @@ -0,0 +1,39 @@ +# ---------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License +# ---------------------------------------------------------------------- +cmake_minimum_required(VERSION 3.5.0) + +project(Featurizer_UnitTests LANGUAGES CXX) + +set(CMAKE_MODULE_PATH "$ENV{DEVELOPMENT_ENVIRONMENT_CMAKE_MODULE_PATH}") + +if(NOT WIN32) + string(REPLACE ":" ";" CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}") + string(REPLACE ":" ";" _includes "$ENV{INCLUDE}") + string(REPLACE ":" ";" _libs "$ENV{LIB}") +endif() + +set(CppCommon_STATIC_CRT ON CACHE BOOL "" FORCE) + +include(CppCommon) +include(BoostCommon) + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +enable_testing() + +foreach(_test_name IN ITEMS + Featurizer_UnitTest +) + add_executable(${_test_name} ${_test_name}.cpp) + + target_include_directories(${_test_name} PRIVATE ${_includes}) + target_link_directories(${_test_name} PRIVATE ${_libs}) + + target_link_libraries(${_test_name} PRIVATE ${Boost_LIBRARIES}) + + add_test(NAME ${_test_name} COMMAND ${_test_name} --success) +endforeach() diff --git a/src/FeaturizerPrep/UnitTests/Featurizer_UnitTest.cpp b/src/FeaturizerPrep/UnitTests/Featurizer_UnitTest.cpp new file mode 100644 index 0000000..ba3bbf1 --- /dev/null +++ b/src/FeaturizerPrep/UnitTests/Featurizer_UnitTest.cpp @@ -0,0 +1,128 @@ +// ---------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License +// ---------------------------------------------------------------------- + +#define CATCH_CONFIG_MAIN +#include "catch.hpp" + +#include "../Featurizer.h" + +class MyTransformer : public Microsoft::Featurizer::Transformer { +public: + // ---------------------------------------------------------------------- + // | Public Methods + MyTransformer(bool true_on_odd=false) : + _true_on_odd(true_on_odd) { + } + + ~MyTransformer(void) override = default; + + MyTransformer(MyTransformer const &) = delete; + MyTransformer & operator =(MyTransformer const &) = delete; + + MyTransformer(MyTransformer &&) = default; + MyTransformer & operator =(MyTransformer &&) = delete; + + return_type transform(arg_type const &arg) const override { + bool const is_odd(arg & 1); + + return _true_on_odd ? is_odd : !is_odd; + } + +private: + // ---------------------------------------------------------------------- + // | Relationships + friend class boost::serialization::access; + + // ---------------------------------------------------------------------- + // | Private Data + bool const _true_on_odd; + + // ---------------------------------------------------------------------- + // | Private Methods + template + void serialize(ArchiveT &ar, unsigned int const /*version*/) { + ar & boost::serialization::base_object(*this); + ar & boost::serialization::make_nvp("true_on_odd", const_cast(_true_on_odd)); + } +}; + +class MyEstimator : public Microsoft::Featurizer::Estimator { +public: + // ---------------------------------------------------------------------- + // | Public Methods + MyEstimator(bool return_invalid_transformer=false) : + _return_invalid_transformer(return_invalid_transformer) { + } + + ~MyEstimator(void) override = default; + + MyEstimator(MyEstimator const &) = delete; + MyEstimator & operator =(MyEstimator const &) = delete; + + MyEstimator(MyEstimator &&) = default; + MyEstimator & operator =(MyEstimator &&) = delete; + +private: + // ---------------------------------------------------------------------- + // | Relationships + friend class boost::serialization::access; + + // ---------------------------------------------------------------------- + // | Private Data + bool const _return_invalid_transformer; + bool _true_on_odd_state; + + // ---------------------------------------------------------------------- + // | Private Methods + MyEstimator & fit_impl(apache_arrow const &data) override { + _true_on_odd_state = static_cast(data); + return *this; + } + + TransformerUniquePtr commit_impl(void) override { + if(_return_invalid_transformer) + return TransformerUniquePtr(); + + return std::make_unique(_true_on_odd_state); + } + + template + void serialize(ArchiveT &ar, unsigned int const /*version*/) { + ar & boost::serialization::base_object(*this); + ar & boost::serialization::make_nvp("return_invalid_transformer", const_cast(_return_invalid_transformer)); + ar & boost::serialization::make_nvp("true_on_odd_state", const_cast(_true_on_odd_state)); + } +}; + +TEST_CASE("Transformer: Functionality") { + CHECK(MyTransformer(true).transform(1) == true); + CHECK(MyTransformer(false).transform(1) == false); + CHECK(MyTransformer(true).transform(2) == false); + CHECK(MyTransformer(false).transform(2) == true); +} + +TEST_CASE("Estimator: Functionality") { + CHECK(MyEstimator().fit(1).commit()->transform(1) == true); + CHECK(MyEstimator().fit(0).commit()->transform(1) == false); + CHECK(MyEstimator().fit(1).commit()->transform(2) == false); + CHECK(MyEstimator().fit(0).commit()->transform(2) == true); +} + +TEST_CASE("Estimator: Errors") { + MyEstimator e; + + CHECK(e.commit()); + CHECK_THROWS_WITH(e.fit(1), Catch::Contains("has already been committed")); + CHECK_THROWS_WITH(e.commit(), Catch::Contains("has already been committed")); + + CHECK_THROWS_WITH(MyEstimator(true).commit(), Catch::Matches("Invalid result")); +} + +TEST_CASE("fit_and_commit") { + CHECK(Microsoft::Featurizer::fit_and_commit(1, false)->transform(1) == true); + CHECK(Microsoft::Featurizer::fit_and_commit(0, false)->transform(1) == false); + CHECK(Microsoft::Featurizer::fit_and_commit(1, false)->transform(2) == false); + CHECK(Microsoft::Featurizer::fit_and_commit(0, false)->transform(2) == true); +}