Merged PR 5081: Added new build configurations, removing unused code

- Added new build configurations for MSVC and Linux builds
- Removed unused code
- Added doxygen documentation generator
- Added placeholder packaging code
This commit is contained in:
David Brownell 2019-09-10 19:54:09 +00:00
Родитель b85a776f72
Коммит 3efef14e12
42 изменённых файлов: 8343 добавлений и 1195 удалений

Просмотреть файл

@ -0,0 +1,86 @@
parameters:
operating_system: "" # Windows|Linux
enable_code_coverage: "" # True|False
configuration: ""
agent_pool: ""
# This is a parameter not because it will be configued by the caller, but rather because arrays can be
# defined here and not in variables.
test_types:
"UnitTests": " $(azure_code_coverage_arg)"
"FunctionalTests": ""
"IntegrationTests": ""
"SystemTests": ""
"LocalEndToEndTests": ""
# 'EndToEndTests': ''
# 'BuildVerificationTests': ''
# 'PerformanceTests: ''
stages:
- stage: BuildAndTest_${{ parameters.operating_system }}_${{ parameters.configuration}}_Stage
displayName: "${{ parameters.operating_system }} - ${{ parameters.configuration }}: "
dependsOn: [] # No dependencies
jobs:
- job: BuildAndTest_${{ parameters.operating_system }}_${{ parameters.configuration}}_Job
# In the UX, this display name contains redundant info (the OS and config appear twice). However, it
# needs to be here to ensure that the information appears in the status email messages.
displayName: "Build and Test (${{ parameters.operating_system }} - ${{ parameters.configuration }})"
pool:
vmImage: "${{ parameters.agent_pool }}"
workspace:
clean: all
steps:
- template: Initialize.steps_template.yaml
parameters:
operating_system: ${{ parameters.operating_system }}
enable_code_coverage: ${{ parameters.enable_code_coverage }}
- script: |-
echo "configuration - ${{ parameters.configuration }}"
echo "agent_pool - ${{ parameters.agent_pool }}"
displayName: "[DEBUG] Display BuildAndTest Variables"
- script: |-
$(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables (Before Activation)"
- script: |-
$(azure_bootstrap_command) $(azure_agent_temp_directory) /verbose
displayName: "<Bootstrap>"
timeoutInMinutes: 180
- script: |-
$(azure_activate_script) ${{ parameters.configuration }} && $(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables (After Activation)"
# TODO: Code formatting
- script: |-
$(azure_activate_script) ${{ parameters.configuration }} && Builder$(azure_script_extension) Execute . "$(azure_artifacts_directory)/Builder" /verbose
displayName: "<Builder>"
timeoutInMinutes: 180
- script: |-
$(azure_activate_script) ${{ parameters.configuration }} && Tester$(azure_script_extension) MatchAllTests . UnitTests /verbose
displayName: "<Tester (MatchAllTests) - UnitTests>"
timeoutInMinutes: 1800
condition: succeededOrFailed()
- ${{ each test_type in parameters.test_types }}:
- script: |-
$(azure_activate_script) ${{ parameters.configuration }} && Tester$(azure_script_extension) TestAll . "$(azure_artifacts_directory)/Tester/${{ test_type.key }}" ${{ test_type.key }} ${{ test_type.value }} /verbose
displayName: "<Tester - ${{ test_type.key }}>"
timeoutInMinutes: 180
condition: succeededOrFailed()
- task: PublishPipelineArtifact@0
displayName: "Publish Artifacts"
inputs:
targetPath: $(azure_artifacts_directory)
artifactName: "${{ parameters.operating_system }} - ${{ parameters.configuration }}"
condition: succeededOrFailed()
timeoutInMinutes: 180

Просмотреть файл

@ -1,8 +1,12 @@
parameters:
agent_pool: ""
# TODO: Remove this once these changes have been moved to master. This file exists as we need to ensure the the point
# continues to function while this change is in flight.
parameters:
operating_system: "" # Windows|Linux
enable_code_coverage: "" # True|False
configuration: ""
agent_pool: ""
# This is a parameter not because it will be configued by the caller, but rather because arrays can be
# defined here and not in variables.
@ -33,73 +37,29 @@ stages:
clean: all
steps:
# These steps are an ugly hack to set Azure DevOps variables conditionally. It really seems like there should be a better way to do this.
- ${{ if eq(parameters.operating_system, 'Windows') }}:
- script: |-
set _AZURE_ACTIVATE_SCRIPT=call Activate.cmd
echo ##vso[task.setvariable variable=azure_activate_script]%_AZURE_ACTIVATE_SCRIPT%
- template: Initialize.steps_template.yaml
parameters:
operating_system: ${{ parameters.operating_system }}
enable_code_coverage: ${{ parameters.enable_code_coverage }}
set _AZURE_SCRIPT_EXTENSION=.cmd
echo ##vso[task.setvariable variable=azure_script_extension]%_AZURE_SCRIPT_EXTENSION%
set _AZURE_DISPLAY_ALL_ENVIRONMENT_VARS=set
echo ##vso[task.setvariable variable=azure_display_all_environment_vars]%_AZURE_DISPLAY_ALL_ENVIRONMENT_VARS%
set _AZURE_BOOTSTRAP_COMMAND=bootstrap.cmd
echo ##vso[task.setvariable variable=azure_bootstrap_command]%_AZURE_BOOTSTRAP_COMMAND%
set _AZURE_CODE_COVERAGE_ARG=/code_coverage
echo ##vso[task.setvariable variable=azure_code_coverage_arg]%_AZURE_CODE_COVERAGE_ARG%
echo ##vso[task.setvariable variable=azure_agent_temp_directory]%AGENT_TEMPDIRECTORY%
echo ##vso[task.setvariable variable=azure_artifacts_directory]%BUILD_ARTIFACTSTAGINGDIRECTORY%
displayName: "[IMPL] Environment-Specific Variables"
- ${{ if eq(parameters.operating_system, 'Linux') }}:
- script: |-
export _AZURE_ACTIVATE_SCRIPT=". ./Activate.sh"
echo "##vso[task.setvariable variable=azure_activate_script]${_AZURE_ACTIVATE_SCRIPT}"
export _AZURE_SCRIPT_EXTENSION=.sh
echo "##vso[task.setvariable variable=azure_script_extension]${_AZURE_SCRIPT_EXTENSION}"
export _AZURE_DISPLAY_ALL_ENVIRONMENT_VARS=export
echo "##vso[task.setvariable variable=azure_display_all_environment_vars]${_AZURE_DISPLAY_ALL_ENVIRONMENT_VARS}"
export _AZURE_BOOTSTRAP_COMMAND="sudo ./bootstrap.sh"
echo "##vso[task.setvariable variable=azure_bootstrap_command]${_AZURE_BOOTSTRAP_COMMAND}"
export _AZURE_CODE_COVERAGE_ARG=
echo "##vso[task.setvariable variable=azure_code_coverage_arg]${_AZURE_CODE_COVERAGE_ARG}"
echo "##vso[task.setvariable variable=azure_agent_temp_directory]${AGENT_TEMPDIRECTORY}"
echo "##vso[task.setvariable variable=azure_artifacts_directory]${BUILD_ARTIFACTSTAGINGDIRECTORY}"
displayName: "[IMPL] Environment-Specific Variables"
# Continue with standard processing
- script: |-
echo "operating_system - ${{ parameters.operating_system }}"
echo "configuration - ${{ parameters.configuration }}"
echo "activate_script - $(azure_activate_script)"
echo "script_extension - $(azure_script_extension)"
echo "display_all_environment_vars - $(azure_display_all_environment_vars)"
echo "bootstrap_command - $(azure_bootstrap_command)"
echo "agent_temp_directory - $(azure_agent_temp_directory)"
displayName: "[DEBUG] Configuration Values"
echo "agent_pool - ${{ parameters.agent_pool }}"
displayName: "[DEBUG] Display BuildAndTest Variables"
- script: |-
$(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables"
displayName: "[DEBUG] Environment Variables (Before Activation)"
- script: |-
$(azure_bootstrap_command) $(azure_agent_temp_directory) /verbose
displayName: "<Bootstrap>"
timeoutInMinutes: 180
- script: |-
$(azure_activate_script) ${{ parameters.configuration }} && $(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables (After Activation)"
# TODO: Code formatting
- script: |-

58
CI/CI.yaml Normal file
Просмотреть файл

@ -0,0 +1,58 @@
name: $(TeamProject)-$(BuildDefinitionName)-$(SourceBranchName)-$(Date:yyyy.MM.dd)-$(Rev:rr)
trigger:
batch: true
branches:
include:
- master
stages:
- template: BuildAndTest.stage_template.yaml
parameters:
agent_pool: windows-2019
operating_system: Windows
configuration: x64
enable_code_coverage: True
- template: BuildAndTest.stage_template.yaml
parameters:
agent_pool: windows-2019
operating_system: Windows
configuration: x64_MSVC
enable_code_coverage: False
- template: BuildAndTest.stage_template.yaml
parameters:
agent_pool: ubuntu-16.04
operating_system: Linux
configuration: x64
enable_code_coverage: False
# TODO: Add a build based on custom image
# - template: BuildAndTest.stage_template.yaml
# parameters:
# agent_pool: <Docker Container>
# operating_system: Linux
# configuration: system_compiler
# enable_code_coverage: False
- template: Documentation.stage_template.yaml
parameters:
agent_pool: "windows-2019"
operating_system: "Windows"
configuration: "system_compiler"
dependencies:
- BuildAndTest_Windows_x64_Stage
- BuildAndTest_Windows_x64_MSVC_Stage
- BuildAndTest_Linux_x64_Stage
# TODO - BuildAndTest_Linux_system_compiler_Stage
- template: Package.stage_template.yaml
parameters:
agent_pool: "windows-2019"
operating_system: "Windows"
configuration: "x64"
dependencies:
- BuildAndTest_Windows_x64_Stage
- BuildAndTest_Windows_x64_MSVC_Stage
- BuildAndTest_Linux_x64_Stage
# TODO - BuildAndTest_Linux_system_compiler_Stage

Просмотреть файл

@ -0,0 +1,56 @@
parameters:
operating_system: "" # Windows|Linux
dependencies: []
configuration: ""
agent_pool: ""
stages:
- stage: Documentation_${{ parameters.operating_system }}_${{ parameters.configuration }}_Stage
displayName: "${{ parameters.operating_system }} - ${{ parameters.configuration }}: "
dependsOn: ${{ parameters.dependencies }}
jobs:
- job: Documentation_${{ parameters.operating_system }}_${{ parameters.configuration }}_Job
displayName: "Documentation: (${{ parameters.operating_system }} - ${{ parameters.configuration }})"
pool:
vmImage: "${{ parameters.agent_pool }}"
workspace:
clean: all
steps:
- template: Initialize.steps_template.yaml
parameters:
operating_system: ${{ parameters.operating_system }}
enable_code_coverage: False
- script: |-
echo "dependencies - ${{ join(', ', parameters.dependencies) }}"
echo "configuration - ${{ parameters.configuration }}"
echo "agent_pool - ${{ parameters.agent_pool }}
displayName: "[DEBUG] Display Documentation Variables"
- script: |-
$(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables (Before Activation)"
- script: |-
$(azure_bootstrap_command) $(azure_agent_temp_directory) /verbose
displayName: "<Bootstrap>"
timeoutInMinutes: 180
- script: |-
$(azure_activate_script) ${{ parameters.configuration }} && $(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables (After Activation)"
- script: |-
$(azure_activate_script) x64 && Doxygener$(azure_script_extension) . "$(azure_artifacts_directory)/Doxygener" /verbose
displayName: "<Doxygener>"
timeoutInMinutes: 180
- task: PublishPipelineArtifact@0
displayName: "Publish Artifacts"
inputs:
targetPath: $(azure_artifacts_directory)
artifactName: "Documentation"
timeoutInMinutes: 180

Просмотреть файл

@ -1,31 +1,28 @@
# TODO: Remove this configuration once Azure DevOps builds have been updated
# to use PR.yaml and CI.yaml.
name: $(TeamProject)-$(BuildDefinitionName)-$(SourceBranchName)-$(Date:yyyy.MM.dd)-$(Rev:rr)
trigger:
batch: true
branches:
include:
- master
- master
stages:
- template: BuildAndTest.template.yaml
parameters:
agent_pool: vs2015-win2012r2
operating_system: Windows
configuration: x64
- template: BuildAndTest.template.yaml
parameters:
agent_pool: vs2015-win2012r2
operating_system: Windows
configuration: x64
- template: BuildAndTest.template.yaml
parameters:
agent_pool: ubuntu-16.04
operating_system: Linux
configuration: x64
- template: BuildAndTest.template.yaml
parameters:
agent_pool: vs2015-win2012r2
operating_system: Windows
configuration: x64_MSVC
- template: BuildAndTest.template.yaml
parameters:
agent_pool: vs2015-win2012r2
operating_system: Windows
configuration: featurizer_prep
- template: BuildAndTest.template.yaml
parameters:
agent_pool: ubuntu-16.04
operating_system: Linux
configuration: featurizer_prep
- template: BuildAndTest.template.yaml
parameters:
agent_pool: ubuntu-16.04
operating_system: Linux
configuration: x64

Просмотреть файл

@ -0,0 +1,58 @@
parameters:
operting_system: "" # Windows|Linux
enable_code_coverage: "" # True|False
steps:
# These steps are an ugly hack to set Azure DevOps variables conditionally. It really seems like there should be a better way to do this.
- ${{ if eq(parameters.operating_system, 'Windows') }}:
- script: |-
echo ##vso[task.setvariable variable=azure_activate_script]call Activate.cmd
echo ##vso[task.setvariable variable=azure_script_extension].cmd
echo ##vso[task.setvariable variable=azure_display_all_environment_vars]set
echo ##vso[task.setvariable variable=azure_bootstrap_command]bootstrap.cmd
echo ##vso[task.setvariable variable=azure_agent_temp_directory]%AGENT_TEMPDIRECTORY%
echo ##vso[task.setvariable variable=azure_artifacts_directory]%BUILD_ARTIFACTSTAGINGDIRECTORY%
displayName: "[IMPL] Set Environment-Specific Variables (Standard)"
- ${{ if eq(parameters.enable_code_coverage, 'true') }}:
- script: |-
echo ##vso[task.setvariable variable=azure_code_coverage_arg]/code_coverage
displayName: "[IMPL] Set Environment-Specific Variables (Code Coverage)"
- ${{ if not(eq(parameters.enable_code_coverage, 'true')) }}:
- script: |-
echo ##vso[task.setvariable variable=azure_code_coverage_arg]
displayName: "[IMPL] Set Environment-Specific Variables (Code Coverage)"
- ${{ if eq(parameters.operating_system, 'Linux') }}:
- script: |-
echo "##vso[task.setvariable variable=azure_activate_script]source ./Activate.sh"
echo "##vso[task.setvariable variable=azure_script_extension].sh"
echo "##vso[task.setvariable variable=azure_display_all_environment_vars]export"
echo "##vso[task.setvariable variable=azure_bootstrap_command]sudo ./bootstrap.sh"
echo "##vso[task.setvariable variable=azure_agent_temp_directory]${AGENT_TEMPDIRECTORY}"
echo "##vso[task.setvariable variable=azure_artifacts_directory]${BUILD_ARTIFACTSTAGINGDIRECTORY}"
displayName: "[IMPL] Set Environment-Specific Variables (Standard)"
- ${{ if eq(parameters.enable_code_coverage, 'true') }}:
- script: |-
echo "##vso[task.setvariable variable=azure_code_coverage_arg]/code_coverage"
displayName: "[IMPL] Set Environment-Specific Variables (Code Coverage)"
- ${{ if not(eq(parameters.enable_code_coverage, 'true')) }}:
- script: |-
echo "##vso[task.setvariable variable=azure_code_coverage_arg]"
displayName: "[IMPL] Set Environment-Specific Variables (Code Coverage)"
- script: |-
echo "operating_system - ${{ parameters.operating_system }}"
echo "enable_code_coverage - ${{ parameters.enable_code_coverage }}"
echo "activate_script - $(azure_activate_script)"
echo "script_extension - $(azure_script_extension)"
echo "display_all_environment_vars - $(azure_display_all_environment_vars)"
echo "bootstrap_command - $(azure_bootstrap_command)"
echo "agent_temp_directory - $(azure_agent_temp_directory)"
echo "artifacts_directory - $(azure_artifacts_directory)"
echo "code_coverage_arg - $(azure_code_coverage_arg)"
displayName: "[DEBUG] Display Environment-Specific Variables"

31
CI/PR.yaml Normal file
Просмотреть файл

@ -0,0 +1,31 @@
name: $(TeamProject)-$(BuildDefinitionName)-$(SourceBranchName)-$(Date:yyyy.MM.dd)-$(Rev:rr)
stages:
- template: BuildAndTest.stage_template.yaml
parameters:
agent_pool: windows-2019
operating_system: Windows
configuration: x64
enable_code_coverage: True
- template: BuildAndTest.stage_template.yaml
parameters:
agent_pool: windows-2019
operating_system: Windows
configuration: x64_MSVC
enable_code_coverage: False
- template: BuildAndTest.stage_template.yaml
parameters:
agent_pool: ubuntu-16.04
operating_system: Linux
configuration: x64
enable_code_coverage: True
# TODO: Add a build based on custom image
# - template: BuildAndTest.stage_template.yaml
# parameters:
# operating_system: Linux
# configuration: system_compiler
# enable_code_coverage: False
# agent_pool: <Docker Container>

Просмотреть файл

@ -0,0 +1,56 @@
parameters:
operating_system: "" # Windows|Linux
dependencies: []
configuration: ""
agent_pool: ""
stages:
- stage: Package_${{ parameters.operating_system }}_${{ parameters.configuration }}_Stage
displayName: "${{ parameters.operating_system }} - ${{ parameters.configuration }}: "
dependsOn: ${{ parameters.dependencies }}
jobs:
- job: Package_${{ parameters.operating_system }}_${{ parameters.configuration }}_Job
displayName: "Package: (${{ parameters.operating_system }} - ${{ parameters.configuration }})"
pool:
vmImage: "${{ parameters.agent_pool }}"
workspace:
clean: all
steps:
- template: Initialize.steps_template.yaml
parameters:
operating_system: ${{ parameters.operating_system }}
enable_code_coverage: False
- script: |-
echo "dependencies - ${{ join(', ', parameters.dependencies) }}"
echo "configuration - ${{ parameters.configuration }}"
echo "agent_pool - ${{ parameters.agent_pool }}
displayName: "[DEBUG] Display Package Variables"
- script: |-
$(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables (Before Activation)"
- script: |-
$(azure_bootstrap_command) $(azure_agent_temp_directory) /verbose
displayName: "<Bootstrap>"
timeoutInMinutes: 180
- script: |-
$(azure_activate_script) ${{ parameters.configuration }} && $(azure_display_all_environment_vars)
displayName: "[DEBUG] Environment Variables (After Activation)"
- script: |-
echo "TODO: Package"
displayName: "<Package>"
timeoutInMinutes: 180
- task: PublishPipelineArtifact@0
displayName: "Publish Artifacts"
inputs:
targetPath: $(azure_artifacts_directory)
artifactName: "Package"
timeoutInMinutes: 180

Просмотреть файл

@ -1,15 +0,0 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
cmake_minimum_required(VERSION 3.5.0)
project(ChronoTransforms_UnitTests LANGUAGES CXX)
include(../../DataPipelinesCommon.cmake)
enable_testing()
_add_unit_tests(ITEMS date_time_UnitTests)
target_sources(date_time_UnitTests PRIVATE ../date_time.cpp ../date_time.hpp)

Просмотреть файл

@ -1,7 +0,0 @@
filter:
includes:
- DataPipelines::Chrono::*
excludes:
- Catch2::*
- std::*

Просмотреть файл

@ -1,97 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#include <cstdio>
#include "../date_time.hpp"
using namespace DataPipelines::Chrono;
using SysClock = std::chrono::system_clock;
TEST_CASE("Past - 1976 Nov 17, 12:27:05", "[chrono][date_time]") {
const time_t date = 217081625;
SysClock::time_point stp = SysClock::from_time_t(date);
// Constructor
TimePoint tp(stp);
CHECK(tp.year == 1976);
CHECK(tp.month == TimePoint::NOVEMBER);
CHECK(tp.day == 17);
CHECK(tp.hour == 12);
CHECK(tp.minute == 27);
CHECK(tp.second == 5);
CHECK(tp.dayOfWeek == TimePoint::WEDNESDAY);
CHECK(tp.dayOfYear == 321);
CHECK(tp.quarterOfYear == 4);
CHECK(tp.weekOfMonth == 2);
// assignment
TimePoint tp1 = stp;
CHECK(tp1.year == 1976);
CHECK(tp1.month == TimePoint::NOVEMBER);
CHECK(tp1.day == 17);
// function
TimePoint tp2 = SystemToDPTimePoint(stp);
CHECK(tp2.year == 1976);
CHECK(tp2.month == TimePoint::NOVEMBER);
CHECK(tp2.day == 17);
}
TEST_CASE("Future - 2025 June 30", "[chrono][date_time]") {
const time_t date = 1751241600;
SysClock::time_point stp = SysClock::from_time_t(date);
// Constructor
TimePoint tp(stp);
CHECK(tp.year == 2025);
CHECK(tp.month == TimePoint::JUNE);
CHECK(tp.day == 30);
CHECK(tp.hour == 0);
CHECK(tp.minute == 0);
CHECK(tp.second == 0);
CHECK(tp.dayOfWeek == TimePoint::MONDAY);
CHECK(tp.dayOfYear == 180);
CHECK(tp.quarterOfYear == 2);
CHECK(tp.weekOfMonth == 4);
}
#ifdef _MSC_VER
// others define system_clock::time_point as nanoseconds (64-bit),
// which rolls over somewhere around 2260. Still a couple hundred years!
TEST_CASE("Far Future - 2998 March 2, 14:03:02", "[chrono][date_time]") {
const time_t date = 32445842582;
SysClock::time_point stp = SysClock::from_time_t(date);
// Constructor
TimePoint tp(stp);
CHECK(tp.year == 2998);
CHECK(tp.month == TimePoint::MARCH);
CHECK(tp.day == 2);
CHECK(tp.hour == 14);
CHECK(tp.minute == 3);
CHECK(tp.second == 2);
CHECK(tp.dayOfWeek == TimePoint::FRIDAY);
CHECK(tp.dayOfYear == 60);
CHECK(tp.quarterOfYear == 1);
CHECK(tp.weekOfMonth == 0);
}
#else
// msvcrt doesn't support negative time_t, so nothing before 1970
TEST_CASE("Pre-Epoch - 1776 July 4", "[chrono][date_time]")
{
const time_t date = -6106060800;
SysClock::time_point stp = SysClock::from_time_t(date);
// Constructor
TimePoint tp(stp);
CHECK(tp.year == 1776);
CHECK(tp.month == TimePoint::JULY);
CHECK(tp.day == 4);
}
#endif /* _MSVCRT */

Просмотреть файл

@ -1,49 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#include <stdexcept>
#include "date_time.hpp"
#ifdef _MSC_VER
inline struct tm *localtime_r(time_t const* const timer, struct tm* const result) {
return localtime_s(result, timer) == 0 ? result : nullptr;
}
inline struct tm *gmtime_r(time_t const* const timer, struct tm* const result) {
return gmtime_s(result, timer) == 0 ? result : nullptr;
}
#endif
using SysClock = std::chrono::system_clock;
namespace DataPipelines::Chrono {
TimePoint::TimePoint(const std::chrono::system_clock::time_point& sysTime) {
// Get to a tm to get what we need.
// Eventually C++202x will have expanded chrono support that might
// have what we need, but not yet!
std::tm tmt;
time_t tt = SysClock::to_time_t(sysTime);
std::tm* res = gmtime_r(&tt, &tmt);
if (res) {
year = static_cast<int32_t>(tmt.tm_year) + 1900;
month = static_cast<uint8_t>(tmt.tm_mon) + 1;
day = static_cast<uint8_t>(tmt.tm_mday);
hour = static_cast<uint8_t>(tmt.tm_hour);
minute = static_cast<uint8_t>(tmt.tm_min);
second = static_cast<uint8_t>(tmt.tm_sec);
dayOfWeek = static_cast<uint8_t>(tmt.tm_wday);
dayOfYear = static_cast<uint16_t>(tmt.tm_yday);
quarterOfYear = (month + 2) / 3;
weekOfMonth = (day - 1) / 7;
}
else
{
if (tt < 0) {
throw std::invalid_argument("Dates prior to 1970 are not supported.");
}
else {
throw std::invalid_argument("Unknown error converting input date.");
}
}
}
}

Просмотреть файл

@ -1,42 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include <chrono>
#include <ctime>
namespace DataPipelines::Chrono {
struct TimePoint {
int32_t year = 0;
uint8_t month = 0; /* 1-12 */
uint8_t day = 0; /* 1-31 */
uint8_t hour = 0; /* 0-23 */
uint8_t minute = 0; /* 0-59 */
uint8_t second = 0; /* 0-59 */
uint8_t dayOfWeek = 0; /* 0-6 */
uint16_t dayOfYear = 0; /* 0-365 */
uint8_t quarterOfYear = 0; /* 1-4 */
uint8_t weekOfMonth = 0; /* 0-4 */
TimePoint(const std::chrono::system_clock::time_point& sysTime);
TimePoint(TimePoint&&) = default;
TimePoint(const TimePoint&) = delete;
TimePoint& operator=(const TimePoint&) = delete;
enum {
JANUARY = 1, FEBRUARY, MARCH, APRIL, MAY, JUNE,
JULY, AUGUST, SEPTEMBER, OCTOBER, NOVEMBER, DECEMBER
};
enum {
SUNDAY = 0, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY
};
};
inline TimePoint SystemToDPTimePoint(const std::chrono::system_clock::time_point& sysTime) {
return TimePoint (sysTime);
}
}

Просмотреть файл

@ -1,33 +0,0 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
set(CMAKE_MODULE_PATH "$ENV{DEVELOPMENT_ENVIRONMENT_CMAKE_MODULE_PATH}")
if(NOT WIN32)
string(REPLACE ":" ";" CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH}")
string(REPLACE ":" ";" _includes "$ENV{INCLUDE}")
string(REPLACE ":" ";" _libs "$ENV{LIB}")
endif()
include(CppCommon)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
function(_add_unit_test _name)
add_executable(${_name} ${_name}.cpp)
target_include_directories(${_name} PRIVATE ${_includes})
target_link_directories(${_name} PRIVATE ${_libs})
add_test(NAME ${_name} COMMAND ${_name} --success)
endfunction(_add_unit_test)
function(_add_unit_tests)
enable_testing()
foreach(_name IN ${ARGN})
_add_unit_test(${_name})
endforeach()
endfunction(_add_unit_tests)

Просмотреть файл

@ -1,16 +0,0 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
cmake_minimum_required(VERSION 3.5.0)
project(SampleTransforms_UnitTest LANGUAGES CXX)
include(../../DataPipelinesCommon.cmake)
enable_testing()
_add_unit_tests(ITEMS arithmetic_UnitTests)
target_sources(arithmetic_UnitTests PRIVATE ../arithmetic.cpp ../arithmetic.hpp)

Просмотреть файл

@ -1,58 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#include <string>
#include "../arithmetic.hpp"
using namespace DataPipelines::Arithmetic;
TEST_CASE("1+2=3", "[Add]") {
CHECK(Add(int64_t(1), int64_t(2)) == int64_t(3));
CHECK(Add(uint64_t(1), uint64_t(2)) == uint64_t(3));
CHECK(Add(uint32_t(1), uint32_t(2)) == uint32_t(3));
CHECK(Add(int32_t(1), int32_t(2)) == int32_t(3));
CHECK(Add(uint16_t(1), uint16_t(2)) == uint16_t(3));
CHECK(Add(double(1.0), double(2.0)) == Approx(3.0));
CHECK(Add(float(1.0), float(2.0)) == Approx(3.0));
CHECK(Addi32(1, 2) == 3);
CHECK(Addu64(1, 2) == 3);
CHECK(Addi64(1, 2) == 3);
CHECK(Addu32(1, 2) == 3);
}
TEST_CASE("char a+b", "[Add]") {
CHECK(Add('a', 'b') == char(0xC3));
}
TEST_CASE("string a+b", "[Add],[String]") {
CHECK(Add(std::string("a"), std::string("b")) == "ab");
}
TEST_CASE("-10+3", "[Add][Negative]") {
CHECK(Add<int32_t>(int32_t(-10), 3) == -7);
CHECK(Add(int64_t(-10), 3) == -7);
CHECK(Add(uint32_t(int32_t(-10)), 3) == 0xFFFFFFF9);
CHECK(Add(uint64_t(int64_t(-10)), 3) == 0xFFFFFFFFFFFFFFF9);
CHECK(Add<uint16_t>(uint16_t((INT16_C(-10))), 3) == uint16_t(0xFFF9));
CHECK(Add(double(-10.0), 3.0) == Approx(-7.0));
CHECK(Add<float>(float(-10.0), 3.0) == Approx(-7.0));
CHECK(Addi32(int32_t(-10), 3) == -7);
}
TEST_CASE("Add Rollover", "[Add][Rollover]") {
CHECK(Add(uint32_t(0xFFFFFFFE), uint32_t(4)) == 2);
CHECK(Add(uint64_t(0xFFFFFFFE), uint64_t(4)) == 0x100000002);
CHECK(Add(uint64_t(0xFFFFFFFFFFFFFFFE), uint64_t(4)) == 2);
CHECK(Add<uint16_t>(uint16_t(0xFFFE), uint16_t(4)) == 2);
CHECK(Add(INT64_MAX, INT64_MAX) == -2);
CHECK(Add<int32_t>(INT32_MAX, INT32_MAX) == -2);
CHECK(Addu64(uint64_t(0xFFFFFFFFFFFFFFFE), 4) == 2);
}
TEST_CASE("Add Struct", "[Add][struct]") {
struct MyStruct s1(10, 25);
struct MyStruct s2(-50, 1000);
struct MyStruct sum = Add(s1, s2);
CHECK(sum.a == -40);
CHECK(sum.b == 1025);
}

Просмотреть файл

@ -1,7 +0,0 @@
filter:
includes:
- DataPipelines::Arithmetic::*
excludes:
- Catch2::*
- std::*

Просмотреть файл

@ -1,45 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#include <stdint.h>
#include "arithmetic.hpp"
/* Defined in enclosed namespace */
namespace DataPipelines {
namespace Arithmetic {
int64_t Add(const int64_t a, const int64_t b) { return a + b; }
uint64_t Add(const uint64_t a, const uint64_t b) { return a + b; }
uint32_t Add(const uint32_t a, const uint32_t b) { return a + b; }
MyStruct Add(const MyStruct& s1, const MyStruct& s2) {
return MyStruct(s1.a + s2.a, s1.b + s2.b);
}
}
}
/* self defined namespace */
double DataPipelines::Arithmetic::Add(const double a, const double b) {
return a + b;
}
int32_t DataPipelines::Arithmetic::Addi32(const int32_t a, const int32_t b) {
return Add<int32_t>(a, b);
}
/* C++ non-overloaded function */
uint64_t Addu64(const uint64_t a, const uint64_t b) {
return DataPipelines::Arithmetic::Add(a, b);
}
/* C-export functions */
int64_t Addi64(const int64_t a, const int64_t b) {
return DataPipelines::Arithmetic::Add(a, b);
}
uint32_t Addu32(const uint32_t a, const uint32_t b) {
return DataPipelines::Arithmetic::Add(a, b);
}

Просмотреть файл

@ -1,51 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include <stdint.h>
#include <assert.h>
/* Sample functions to add two things.
* Various signature types until we've got a handle on what we want to export. */
/* Using a namespace */
namespace DataPipelines {
namespace Arithmetic {
/* Overloaded function name */
int64_t Add(const int64_t a, const int64_t b);
uint64_t Add(const uint64_t a, const uint64_t b);
uint32_t Add(const uint32_t a, const uint32_t b);
double Add(const double a, const double b);
/* Templated type, inline definition */
template<typename T> T Add(const T a, const T b) { return a + b; };
/* Using a struct */
struct MyStruct {
int64_t a;
int64_t b;
MyStruct(int64_t _a = 0, int64_t _b = 0) : a(_a), b(_b) {}
MyStruct(MyStruct&& s) { a = s.a; b = s.b; }
MyStruct(const MyStruct& s) = delete;
MyStruct& operator=(const MyStruct&) = delete;
};
MyStruct Add(const MyStruct& s1, const MyStruct& s2);
/* unique function name, still in a namespace */
int32_t Addi32(const int32_t a, const int32_t b);
}
}
/* C++ simple non-overloaded function */
uint64_t Addu64(const uint64_t a, const uint64_t b);
/* C declarations */
extern "C" {
int64_t Addi64(const int64_t a, const int64_t b);
uint32_t Addu32(const uint32_t a, const uint32_t b);
}

Просмотреть файл

@ -1,16 +0,0 @@
# ----------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License
# ----------------------------------------------------------------------
cmake_minimum_required(VERSION 3.5.0)
project(StringTransforms_UnitTest LANGUAGES CXX)
include(../../DataPipelinesCommon.cmake)
enable_testing()
_add_unit_tests(ITEMS regex_vectorizer_UnitTests)
target_sources(regex_vectorizer_UnitTests PUBLIC ../regex_vectorizer.cpp ../regex_vectorizer.hpp)

Просмотреть файл

@ -1,7 +0,0 @@
filter:
includes:
- DataPipelines::StringTransforms::RegEx::*
excludes:
- Catch2::*
- std::*

Просмотреть файл

@ -1,304 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#include <string>
#include "../regex_vectorizer.hpp"
using namespace DataPipelines::StringTransforms;
struct TestData {
std::vector<std::string> dataIn;
std::vector<std::string> regexIn;
std::vector<std::vector<double>> vectorOut;
};
inline std::vector<std::vector<bool>>
doubleArrayToBool(const std::vector<std::vector<double>>& inData) {
std::vector<std::vector<bool>> out;
for (auto vec : inData) {
std::vector<bool> v;
std::transform(vec.begin(), vec.end(), std::back_insert_iterator(v),
[](double d) -> bool { return d != 0.0; });
out.push_back(v);
}
return out;
}
inline std::vector<std::vector<double>>
normalizeDoubleArray(const std::vector<std::vector<double>>& inData)
{
std::vector<std::vector<double>> out;
for (auto vec : inData) {
std::vector<double> v;
if (vec.size()) {
std::transform(vec.begin(), vec.end(), std::back_insert_iterator(v),
[&vec](double d) -> double { return d / static_cast<double>(vec.size()); });
}
out.push_back(v);
}
return out;
}
inline RegEx::vector3<double>
normalizeDoubleArray(const RegEx::vector3<double>& inData)
{
RegEx::vector3<double> out;
for (auto vec : inData) {
out.push_back(normalizeDoubleArray(vec));
}
return out;
}
inline TestData TestData1()
{
TestData td;
// The test data currently is just a copy of what is in regex_vectorizer.py..
td.dataIn = {
"@adoran2 Good points made by article - I am home, in case anyone is wondering ",
" Wow a Phil Collins song I've not heard. Nice! ",
" a_dubstar Nahhhhhh i'm even worst ",
" @abtnova can u please tell me if the jonas brothers competition has finished im a finalist and apparently it has ill b devo if its not me ",
" @afiaa_1212 wow wow wow who just made a new account yaaa?? Hahahaha hello fellow twitter tweet tweet tweet "
};
td.regexIn = {
"[0-9]{4,10}",
"[A-Z]{2,6}",
"([A-Z])\\1{4,6}",
"(.)\\1{3,6}",
"[!@#$%&\\*!]",
"@[A-Za-z0-9]+",
"(\\w+\\s)\\1{2,4}",
"[!@#\\*]{3,5}",
"[A-Z#!\\*]{4,6}"
};
td.vectorOut = {
{ 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0 },
{ 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0 },
{ 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0 },
{ 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0 },
{ 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0 }
};
return td;
}
TEST_CASE("Vectorizer", "[strings][regex][vectorizer]") {
TestData td = TestData1();
std::vector<std::vector<bool>> boolVectorization = doubleArrayToBool(td.vectorOut);
std::vector<std::vector<double>> normVectorization = normalizeDoubleArray(td.vectorOut);
REQUIRE(boolVectorization.size() == td.dataIn.size());
REQUIRE(boolVectorization[0].size() == td.regexIn.size());
RegEx::Vectorizer vectorizer(td.regexIn);
SECTION("Vectorize") {
SECTION("Vectorizer Object") {
// "normalized"
auto v1 = vectorizer.Vectorize(td.dataIn, true);
CHECK(v1 == normVectorization);
// straight
auto v2 = vectorizer.Vectorize(td.dataIn, false);
CHECK(v2 == td.vectorOut);
// boolean
auto v3 = vectorizer.VectorizeBool(td.dataIn);
CHECK(v3 == boolVectorization);
}
SECTION("Persistent regex list") {
auto regexes = RegEx::Compile(td.regexIn);
CHECK(regexes.size() == td.regexIn.size());
auto v1 = RegEx::VectorizeList(regexes, td.dataIn);
CHECK(v1 == normVectorization);
auto v2 = RegEx::VectorizeList(regexes, td.dataIn, false);
CHECK(v2 == td.vectorOut);
auto v3 = RegEx::VectorizeListBool(regexes, td.dataIn);
CHECK(v3 == boolVectorization);
}
SECTION("Vectorize straight from strings, no persistence") {
auto v1 = RegEx::VectorizeStrings(td.regexIn, td.dataIn);
CHECK(v1 == normVectorization);
auto v2 = RegEx::VectorizeStrings(td.regexIn, td.dataIn, false);
CHECK(v2 == td.vectorOut);
auto v3 = RegEx::VectorizeStringsBool(td.regexIn, td.dataIn);
CHECK(v3 == boolVectorization);
}
}
}
TEST_CASE("Empty Lists", "[strings][regex][vectorizer][edge case]") {
TestData td;
td.dataIn = {
"@adoran2 Good points made by article - I am home, in case anyone is wondering ",
" Wow a Phil Collins song I've not heard. Nice! "
};
td.regexIn = {
"[0-9]{4,10}",
"[A-Z]{2,6}",
"([A-Z])\\1{4,6}"
};
std::vector<std::string> emptyInList = {};
std::vector<std::vector<double>> emptyStringOutList = {}; // empty outer
std::vector<std::vector<double>> emptyRegexOutList = {{},{}}; // 2 outer, empty inner
SECTION("Empty input") {
auto v1 = RegEx::VectorizeStrings(td.regexIn, emptyInList, false);
CHECK(v1 == emptyStringOutList);
auto v2 = RegEx::VectorizeStrings(td.regexIn, emptyInList, true);
CHECK(v2 == emptyStringOutList);
}
SECTION("Empty regex") {
auto v1 = RegEx::VectorizeStrings(emptyInList, td.dataIn, false);
CHECK(v1 == emptyRegexOutList);
auto v2 = RegEx::VectorizeStrings(emptyInList, td.dataIn, true);
CHECK(v2 == emptyRegexOutList);
}
SECTION("Empty regex and input") {
auto v1 = RegEx::VectorizeStrings(emptyInList, emptyInList, false);
CHECK(v1 == emptyStringOutList);
auto v2 = RegEx::VectorizeStrings(emptyInList, emptyInList, true);
CHECK(v2 == emptyStringOutList);
}
}
TEST_CASE("Invalid Regex", "[strings][regex][vectorizer][error]") {
TestData td;
td.dataIn = {
"1234 5678",
" blah blah"
};
td.regexIn = {
"[0-9]{]",
"[[[]]]]{}"
};
SECTION("Vectorizer Object") {
CHECK_THROWS_AS(RegEx::Vectorizer(td.regexIn), std::regex_error);
}
SECTION("Persistent list") {
CHECK_THROWS_AS(RegEx::Compile(td.regexIn), std::regex_error);
}
SECTION("From string") {
CHECK_THROWS_AS(RegEx::VectorizeStrings(td.regexIn, td.dataIn),
std::regex_error);
}
}
TEST_CASE("Empty Strings", "[strings][regex][vectorizer][edge case]") {
TestData td;
td.dataIn = {
"1234 9876 ",
"ABCCD FFFFF"
};
td.regexIn = {
"[0-9]{4,10}",
"[A-Z]{2,6}",
"([A-Z])\\1{4,6}"
};
td.vectorOut = { { 1.0, 0.0, 0.0 }, { 0.0, 1.0, 1.0 } };
std::vector<double> emptyStrOut = { { 0.0, 0.0, 0.0 } };
std::vector<std::string> regexStrEmpty(3);
regexStrEmpty[0] = "";
regexStrEmpty[1] = td.regexIn[1];
regexStrEmpty[2] = td.regexIn[2];
std::vector<std::string> inputEmpty(3);
inputEmpty[0] = td.dataIn[0];
inputEmpty[1] = "";
inputEmpty[2] = td.dataIn[1];
SECTION("Empty input string") {
auto v1 = RegEx::VectorizeStrings(td.regexIn, inputEmpty, false);
CHECK(v1[0] == td.vectorOut[0]);
CHECK(v1[1] == emptyStrOut);
CHECK(v1[2] == td.vectorOut[1]);
}
SECTION("Empty regex string") {
auto v1 = RegEx::VectorizeStrings(regexStrEmpty, td.dataIn, false);
CHECK(v1[0][0] == 0.0);
CHECK(v1[1][0] == 0.0);
CHECK(v1[1][1] == 1.0);
}
SECTION("Empty string in regex and input") {
auto v1 = RegEx::VectorizeStrings(regexStrEmpty, inputEmpty, false);
CHECK(v1[0][0] == 0.0);
CHECK(v1[1][0] == 0.0);
CHECK(v1[2][0] == 0.0);
CHECK(v1[1][1] == 0.0);
CHECK(v1[2][2] == 1.0);
}
}
TEST_CASE("Vectorize Words", "[strings][regex][vectorizer]") {
TestData td;
td.dataIn = {
"@adoran2 Good points made by article - I am home, in ",
" @afiaa_1212 bla blah ",
"1234 9876 ",
"ABCCD FFFFF"
};
td.regexIn = {
"[0-9]{4,10}",
"[A-Z]{2,6}",
"([A-Z])\\1{4,6}"
};
RegEx::vector3<double> vectorOut = {
{{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0},
{0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0}},
{{1, 0, 0}, {0, 0, 0}, {0, 0, 0}},
{{1, 0, 0}, {1, 0, 0}},
{{0, 1, 0}, {0, 1, 1}}
};
RegEx::vector3<double> normVectorization = normalizeDoubleArray(vectorOut);
RegEx::Vectorizer vectorizer(td.regexIn);
SECTION("Vectorize") {
SECTION("Vectorizer Object") {
// "normalized"
auto v1 = vectorizer.VectorizeWords(td.dataIn, true);
CHECK(v1 == normVectorization);
// straight
auto v2 = vectorizer.VectorizeWords(td.dataIn, false);
CHECK(v2 == vectorOut);
}
SECTION("Persistent regex list") {
auto regexes = RegEx::Compile(td.regexIn);
CHECK(regexes.size() == td.regexIn.size());
auto v1 = RegEx::VectorizeWords(regexes, td.dataIn);
CHECK(v1 == normVectorization);
auto v2 = RegEx::VectorizeWords(regexes, td.dataIn, false);
CHECK(v2 == vectorOut);
}
}
}

Просмотреть файл

@ -1,84 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#include "regex_vectorizer.hpp"
const char* const str_regex_word = "(\\S)+";
namespace DataPipelines::StringTransforms::RegEx {
using namespace std;
// Use some template trickery to reuse the same code
// for whatever type
template<typename T> constexpr T true_val() { return 1; }
template<typename T> constexpr T false_val() { return 0; }
template<> constexpr bool true_val<bool>() { return true; }
template<> constexpr bool false_val<bool>() { return false; }
template<> constexpr double true_val<double>() { return 1.0; }
template<> constexpr double false_val<double>() { return 0.0; }
// Vectorize occurences of a list of regexes in a list of strings.
template<typename T> vector<vector<T>>
VectorizeList(const vector<re>& regexList, const vector<string>& input, bool normalize) {
vector<vector<T>> results;
results.reserve(input.size());
size_t count = regexList.size();
T trueVal = true_val<T>();
// I really don't think this is right, not really normalized,
// but it's what the python reference does...
if (normalize && count) {
trueVal /= static_cast<T>(count);
}
for (auto s : input)
{
vector<T> v;
v.reserve(count);
for (const re& regEx : regexList)
{
v.emplace_back(
std::regex_search(s, regEx, reconst::match_any | reconst::match_not_null)
? trueVal : false_val<T>());
}
results.push_back(std::move(v));
}
return results;
}
vector<vector<double>>
VectorizeList(const vector<re>& regexList, const vector<string>& input, bool normalize) {
return VectorizeList<double>(regexList, input, normalize);
}
vector<vector<bool>>
VectorizeListBool(const vector<re>& regexList, const vector<string>& input) {
return VectorizeList<bool>(regexList, input, false);
}
// Vectorize occurences of a list of regexes in individual words
// in a list of strings.
vector3<double>
VectorizeWords(const vector<re>& regexList, const vector<string>& input, bool normalize) {
vector3<double> results;
results.reserve(input.size());
for (const string& str : input) {
// Break the string up into a vector of words
vector<string> v;
std::regex wordsRegex(str_regex_word);
auto words_end = std::sregex_iterator();
for (auto i = std::sregex_iterator(str.begin(), str.end(), wordsRegex);
i != words_end; ++i) {
std::smatch m = *i;
v.push_back(m.str());
}
// Now vectorize this list of strings and add it on.
results.emplace_back(VectorizeList(regexList, v, normalize));
}
return results;
}
}

Просмотреть файл

@ -1,125 +0,0 @@
// ----------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include <vector>
#include <regex>
namespace DataPipelines::StringTransforms::RegEx {
// define some things so the types aren't all 80 chars long
using re = std::regex;
namespace reconst = std::regex_constants;
template <typename T> using vector2 = std::vector<std::vector<T>>;
template <typename T> using vector3 = std::vector<std::vector<std::vector<T>>>;
// Default flags - use ECMAScript matching (this is the language default)
// and 'optimize', which optimizes for actually searching the regex over
// create/compile
const static re::flag_type default_flags = reconst::ECMAScript | reconst::optimize;
// Compile: Compile regex strings into std::regex objects.
// These are persistent, to be passed back in future calls
// to Vectorize
inline std::vector<re> Compile(const std::vector<std::string>& regexStrings,
const re::flag_type syntaxFlags = default_flags);
// VectorizeList: Vectorize the strings given.
// Returns 1.0 per regex (per string) if there is a match. Not the number
// of matches, just a 1 or 0
// normalize: does a sort of normalizing of the strings over all the regexes,
// so its 1/N or 0 instead of 1 or 0
vector2<double> VectorizeList(const std::vector<re>& regexList,
const std::vector<std::string>& input,
bool normalize = true);
// VectorizeListBool: Same as above, but with bool instead of double
vector2<bool> VectorizeListBool(const std::vector<re>& regexList,
const std::vector<std::string>& input);
// vector<vector<vector<double>>> ....
// Needs a multi-dimensional vectors.
// Requires discussion regarding e.g. linear algebra libraries, etc.
vector3<double> VectorizeWords(const std::vector<re>& regexList,
const std::vector<std::string>& input,
bool normalize = true);
// One-off straight from string regexes. Simpler if you only need to
// use the regex list one time.
// If you need to reuse the regexes, use one of the persistent versions
inline vector2<double>
VectorizeStrings(const std::vector<std::string>& regexStrings,
const std::vector<std::string>& input,
bool vectorize = true);
inline vector2<bool>
VectorizeStringsBool(const std::vector<std::string>& regexStrings,
const std::vector<std::string>& input);
// C++ class encapsulating the persistent regexes and vectorization
class Vectorizer {
public:
Vectorizer(const std::vector<std::string>& regexStrings,
const re::flag_type syntaxFlags = default_flags) :
regexList(Compile(regexStrings, syntaxFlags)) {}
Vectorizer(Vectorizer&& other) : regexList(std::move(other.regexList)) {}
Vectorizer(const Vectorizer&) = delete;
Vectorizer operator=(const Vectorizer&) = delete;
inline vector2<double>
Vectorize(const std::vector<std::string>& input, bool normalize = true);
inline vector2<bool>
VectorizeBool(const std::vector<std::string>& input);
inline vector3<double>
VectorizeWords(const std::vector<std::string>& input,
bool normalize = true);
private:
std::vector<re> regexList;
};
///// Inline implementations
inline std::vector<re>
Compile(const std::vector<std::string>& regexStrings,
const re::flag_type syntaxFlags) {
std::vector<re> regexList;
regexList.reserve(regexStrings.size());
for (auto regexStr : regexStrings) {
regexList.push_back(re(regexStr, syntaxFlags));
}
return regexList;
}
inline vector2<double>
VectorizeStrings(const std::vector<std::string>& regexStrings,
const std::vector<std::string>& input,
bool normalize) {
return VectorizeList(Compile(regexStrings), input, normalize);
}
inline vector2<bool>
VectorizeStringsBool(const std::vector<std::string>& regexStrings,
const std::vector<std::string>& input) {
return VectorizeListBool(Compile(regexStrings), input);
}
inline vector2<double>
Vectorizer::Vectorize(const std::vector<std::string>& input, bool normalize) {
return VectorizeList(regexList, input, normalize);
}
inline vector2<bool>
Vectorizer::VectorizeBool(const std::vector<std::string>& input) {
return VectorizeListBool(regexList, input);
}
inline vector3<double>
Vectorizer::VectorizeWords(const std::vector<std::string>& input, bool normalize) {
return ::DataPipelines::StringTransforms::RegEx::VectorizeWords(regexList, input, normalize);
}
}

Просмотреть файл

@ -8,9 +8,14 @@ import textwrap
import CommonEnvironment
from DataPipelines.CppToJson import CppToJson
from DataPipelines.CppToJson.GeneratedCode.CppToJson_PythonJsonSerialization import *
try:
from DataPipelines.CppToJson import CppToJson
from DataPipelines.CppToJson.GeneratedCode.CppToJson_PythonJsonSerialization import *
except ModuleNotFoundError:
# If here, it might be that clang is not available
if os.getenv("DEVELOPMENT_ENVIRONMENT_REPOSITORY_CONFIGURATION") != "x64":
sys.stdout.write("Clang is not available in this configuration.\nOK <- this is enough to convince the test parser that tests were successful.\n")
sys.exit(0)
# ----------------------------------------------------------------------
_script_fullpath = CommonEnvironment.ThisFullpath()
@ -116,7 +121,7 @@ class FileTest(unittest.TestCase):
def onUnsupportedFunc(error_desc, this_filename, line):
nonlocal called_count
called_count += 1
unsupported_list = [
[textwrap.dedent("""\
The function operator+ is not supported:
@ -277,7 +282,7 @@ class Deserialization(unittest.TestCase):
results = CppToJson.ObtainFunctions(filename, None, Policy)
deserialized_result = Deserialize([results[filename]], always_include_optional=True)
self.assertEqual(len(deserialized_result), 1)
self.assertEqual(deserialized_result[0].function_list[0].name, "goooo")

Просмотреть файл

@ -1,14 +1,20 @@
'''Unit test for CppToJson.py
'''
import os
import sys
import json
import unittest
import textwrap
from DataPipelines.CppToJson import CppToJson
try:
from DataPipelines.CppToJson import CppToJson
except ModuleNotFoundError:
# If here, it might be that clang is not available
if os.getenv("DEVELOPMENT_ENVIRONMENT_REPOSITORY_CONFIGURATION") != "x64":
sys.stdout.write("Clang is not available in this configuration.\nOK <- this is enough to convince the test parser that tests were successful.\n")
sys.exit(0)
class FuncTest(unittest.TestCase):
def test_add_func(self):
s = textwrap.dedent('''\
#include <cstdint>
@ -63,7 +69,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -108,7 +114,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -152,7 +158,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -192,7 +198,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -232,7 +238,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -270,7 +276,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -307,7 +313,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -350,7 +356,7 @@ class FuncTest(unittest.TestCase):
return 0;
}
''')
result = CppToJson.ObtainFunctions(s, None, lambda type, verifyStruct: True)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -772,7 +778,7 @@ class FuncTest(unittest.TestCase):
bool go(x ya){
return 2;
}
int main(){
return 0;
}
@ -806,7 +812,7 @@ class FuncTest(unittest.TestCase):
include_list = self._GetIncludeList(result)
self.assertEqual(times_called, 1)
self.assertEqual(func_list[0]['name'], 'main')
self.assertEqual(func_list[0]['raw_return_type'], 'int')
self.assertEqual(func_list[0]['simple_return_type'], 'int')
@ -815,7 +821,7 @@ class FuncTest(unittest.TestCase):
self.assertEqual(func_list[0]['simple_var_types'], [])
self.assertEqual(func_list[0]['definition_line'], 12)
self.assertEqual(func_list[0]['declaration_line'], 12)
self.assertEqual(struct_list, [])
self.assertEqual(len(include_list), 2)
@ -825,7 +831,7 @@ class FuncTest(unittest.TestCase):
#include <utility>
#include <cstdio>
#include <cstdint>
struct x{
bool a, b;
x(){}
@ -894,7 +900,7 @@ class FuncTest(unittest.TestCase):
self.assertEqual(struct_list[0]['constructor_list'][1]['var_names'], ['other'])
self.assertEqual(struct_list[0]['constructor_list'][1]['raw_var_types'], ['x &&'])
self.assertEqual(struct_list[0]['constructor_list'][1]['simple_var_types'], ['x'])
self.assertEqual(struct_list[0]['constructor_list'][1]['definition_line'], 8)
self.assertEqual(struct_list[0]['constructor_list'][1]['definition_line'], 8)
self.assertEqual(len(include_list), 3)
@ -921,7 +927,7 @@ class FuncTest(unittest.TestCase):
self.assertTrue([error_desc, filename, line] in unsupported_list)
# ----------------------------------------------------------------------
result = CppToJson.ObtainFunctions(s, onUnsupportedFunc, lambda type, verifyStruct: False)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -945,7 +951,7 @@ class FuncTest(unittest.TestCase):
self.assertTrue(False)
# ----------------------------------------------------------------------
result = CppToJson.ObtainFunctions(s, onUnsupportedFunc, lambda type, verifyStruct: False)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -996,7 +1002,7 @@ class FuncTest(unittest.TestCase):
return False
# ----------------------------------------------------------------------
result = CppToJson.ObtainFunctions(s, onUnsupportedFunc, Policy)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -1058,7 +1064,7 @@ class FuncTest(unittest.TestCase):
return False
# ----------------------------------------------------------------------
result = CppToJson.ObtainFunctions(s, onUnsupportedFunc, Policy)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -1094,7 +1100,7 @@ class FuncTest(unittest.TestCase):
struct R2: public R{
};
R2 func3(){
R2 y;
return y;
@ -1106,7 +1112,7 @@ class FuncTest(unittest.TestCase):
def onUnsupportedFunc(error_desc, filename, line):
nonlocal times_called
times_called = times_called + 1
unsupported_list = [
[textwrap.dedent("""\
The function func is not supported:
@ -1141,7 +1147,7 @@ class FuncTest(unittest.TestCase):
return False
# ----------------------------------------------------------------------
result = CppToJson.ObtainFunctions(s, onUnsupportedFunc, Policy)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -1176,7 +1182,7 @@ class FuncTest(unittest.TestCase):
struct R2: public R{
};
R2 func3(){
R2 y;
return y;
@ -1226,7 +1232,7 @@ class FuncTest(unittest.TestCase):
return False
# ----------------------------------------------------------------------
result = CppToJson.ObtainFunctions(s, onUnsupportedFunc, Policy)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -1261,7 +1267,7 @@ class FuncTest(unittest.TestCase):
struct R2: public R{
};
R2 func3(){
R2 y;
return y;
@ -1281,7 +1287,7 @@ class FuncTest(unittest.TestCase):
def onUnsupportedFunc(error_desc, filename, line):
nonlocal times_called
times_called = times_called + 1
unsupported_list = [
[textwrap.dedent("""\
The function func is not supported:
@ -1325,7 +1331,7 @@ class FuncTest(unittest.TestCase):
return False
# ----------------------------------------------------------------------
result = CppToJson.ObtainFunctions(s, onUnsupportedFunc, Policy)
func_list = self._GetFuncList(result)
struct_list = self._GetStructList(result)
@ -1384,7 +1390,7 @@ class FuncTest(unittest.TestCase):
self.assertEqual(struct_list, [])
self.assertEqual(len(include_list), 1)
def test_alias_namespace(self):
s = textwrap.dedent('''\
#include <cstdint>

Просмотреть файл

@ -43,7 +43,7 @@ _script_dir, _script_name = os.path.split(_script_fullpath)
# <Wildcard import> pylint: disable = W0401
# <Unused argument> pylint: disable = W0613
fundamental_repo = os.getenv("DEVELOPMENT_ENVIRONMENT_FUNDAMENTAL")
fundamental_repo = os.getenv("DEVELOPMENT_ENVIRONMENT_FUNDAMENTAL")
assert os.path.isdir(fundamental_repo), fundamental_repo
sys.path.insert(0, fundamental_repo)
@ -97,41 +97,42 @@ def GetDependencies():
d = OrderedDict()
for architecture in ["x64",]:
d[architecture] = Configuration(
architecture,
d["x64"] = Configuration(
"Builds using Clang on an x64 architecture",
[
Dependency(
"3DE9F3430E494A6C8429B26A1503C895",
"Common_cpp_Clang_8",
"x64-ex",
"https://github.com/davidbrownell/Common_cpp_Clang_8.git",
),
],
)
if CurrentShell.CategoryName == "Windows":
d["x64_MSVC"] = Configuration(
"Builds using MSVC 2019 on an x64 architecture",
[
Dependency(
"3DE9F3430E494A6C8429B26A1503C895",
"Common_cpp_Clang_8",
"{}-ex".format(architecture),
"https://github.com/davidbrownell/Common_cpp_Clang_8.git",
),
# TODO: This configuration doesn't depend on boost, however there are some tests associated with the
# `featurization_prep` configuration do. Include it for now, as there isn't a way to specify
# configuration-specific tests at this time. Remove the following dependency once there is a
# way to communicate this information.
Dependency(
"407DD743110A4FB1871AEF60CBEC99A0",
"Common_cpp_boost_1.70.0",
"standard",
"https://github.com/davidbrownell/Common_cpp_boost_1.70.0.git",
"AB7D87C49C2449F79D9F42E5195030FD",
"Common_cpp_MSVC_2019",
"x64",
"https://github.com/davidbrownell/Common_cpp_MSVC_2019.git",
),
],
)
d["featurizer_prep"] = copy.deepcopy(d["x64"])
# TODO: Enable this once the TODO comment above is resolved.
#
# d["featurizer_prep"].Dependencies.append(
# Dependency(
# "407DD743110A4FB1871AEF60CBEC99A0",
# "Common_cpp_boost_1.70.0",
# "standard",
# "https://github.com/davidbrownell/Common_cpp_boost_1.70.0.git",
# ),
# )
d["system_compiler"] = Configuration(
"Builds using the system-installed compiler on an x64 architecture (this will typically be used in a container with dependencies preinstalled)",
[
Dependency(
"F33C43DA6BB54336A7573B39509CDAD7",
"Common_cpp_Common",
"x64",
"https://github.com/davidbrownell/Common_cpp_Common.git",
),
],
)
return d

Просмотреть файл

@ -42,23 +42,53 @@ _script_dir, _script_name = os.path.split(_script_fullpath)
# Tuples in the form:
# ("<repo name>", "<clone command line>", "<setup command suffix>" or None)
_REPO_DATA = [
("Common_cpp_Clang_8", 'git clone https://github.com/davidbrownell/Common_cpp_Clang_8 "{output_dir}"', None),
("Common_cpp_Clang_Common", 'git clone https://github.com/davidbrownell/Common_cpp_Clang_Common "{output_dir}"', None),
("Common_cpp_Common", 'git clone https://github.com/davidbrownell/Common_cpp_Common "{output_dir}"', None),
("Common_cpp_boost_Common", 'git clone https://github.com/davidbrownell/Common_cpp_boost_Common "{output_dir}"', None),
("Common_cpp_boost_1.70.0", 'git clone https://github.com/davidbrownell/Common_cpp_boost_1.70.0 "{output_dir}"', '"/configuration=standard" "/configuration=MSVC-2019-x64"'),
(
"Common_cpp_Clang_8",
'git clone https://github.com/davidbrownell/Common_cpp_Clang_8 "{output_dir}"',
None,
),
(
"Common_cpp_Clang_Common",
'git clone https://github.com/davidbrownell/Common_cpp_Clang_Common "{output_dir}"',
None,
),
(
"Common_cpp_Common",
'git clone https://github.com/davidbrownell/Common_cpp_Common "{output_dir}"',
None,
),
]
if CurrentShell.CategoryName == "Linux":
_REPO_DATA += [
("Common_cpp_binutils", 'git clone https://github.com/davidbrownell/Common_cpp_binutils "{output_dir}"', None),
("Common_cpp_GCC", 'git clone https://github.com/davidbrownell/Common_cpp_GCC "{output_dir}"', None),
(
"Common_cpp_binutils",
'git clone https://github.com/davidbrownell/Common_cpp_binutils "{output_dir}"',
None,
),
(
"Common_cpp_GCC",
'git clone https://github.com/davidbrownell/Common_cpp_GCC "{output_dir}"',
None,
),
]
elif CurrentShell.CategoryName == "Windows":
_REPO_DATA += [
("Common_cpp_MSVC_2019", 'git clone https://github.com/davidbrownell/Common_cpp_MSVC_2019 "{output_dir}"', None),
("Common_cpp_MSVC_Common", 'git clone https://github.com/davidbrownell/Common_cpp_MSVC_Common "{output_dir}"', None),
("Common_cpp_MSVC_WindowsKits_10", 'git clone https://github.com/davidbrownell/Common_cpp_MSVC_WindowsKits_10 "{output_dir}"', None),
(
"Common_cpp_MSVC_2019",
'git clone https://github.com/davidbrownell/Common_cpp_MSVC_2019 "{output_dir}"',
None,
),
(
"Common_cpp_MSVC_Common",
'git clone https://github.com/davidbrownell/Common_cpp_MSVC_Common "{output_dir}"',
None,
),
(
"Common_cpp_MSVC_WindowsKits_10",
'git clone https://github.com/davidbrownell/Common_cpp_MSVC_WindowsKits_10 "{output_dir}"',
None,
),
]
else:
raise Exception("'{}' is not supported OS".format(CurrentShell.CategoryName))
@ -70,7 +100,9 @@ inflect = inflect_mod.engine()
# ----------------------------------------------------------------------
@CommandLine.EntryPoint(
setup_args=CommandLine.EntryPoint.Parameter("Optional arguments passed to this repository's setup command"),
setup_args=CommandLine.EntryPoint.Parameter(
"Optional arguments passed to this repository's setup command",
),
)
@CommandLine.Constraints(
output_dir=CommandLine.DirectoryTypeInfo(),
@ -96,13 +128,18 @@ def EntryPoint(
dm.stream.write("Calculating enlistment repositories...")
with dm.stream.DoneManager(
done_suffix=lambda: "{} found for enlistment".format(inflect.no("repository", len(enlistment_repositories))),
done_suffix=lambda: "{} found for enlistment".format(
inflect.no("repository", len(enlistment_repositories)),
),
suffix="\n",
) as this_dm:
for data in _REPO_DATA:
repo_name = data[0]
repo_output_dir = os.path.join(output_dir, repo_name.replace("_", os.path.sep))
repo_output_dir = os.path.join(
output_dir,
repo_name.replace("_", os.path.sep),
)
if not os.path.isdir(repo_output_dir):
enlistment_repositories.append((repo_output_dir, data))
else:
@ -113,12 +150,22 @@ def EntryPoint(
repo_data[_script_dir] = (_script_dir, None, setup_args)
if enlistment_repositories:
dm.stream.write("Enlisting in {}...".format(inflect.no("repository", len(enlistment_repositories))))
dm.stream.write(
"Enlisting in {}...".format(
inflect.no("repository", len(enlistment_repositories)),
),
)
with dm.stream.DoneManager(
suffix="\n",
) as enlist_dm:
for index, (repo_output_dir, data) in enumerate(enlistment_repositories):
enlist_dm.stream.write("'{}' ({} of {})...".format(data[0], index + 1, len(enlistment_repositories)))
enlist_dm.stream.write(
"'{}' ({} of {})...".format(
data[0],
index + 1,
len(enlistment_repositories),
),
)
with enlist_dm.stream.DoneManager() as this_dm:
FileSystem.MakeDirs(os.path.dirname(repo_output_dir))
@ -152,30 +199,48 @@ def EntryPoint(
CurrentShell.UpdateOwnership(output_dir)
if sync_repositories:
dm.stream.write("Syncing {}...".format(inflect.no("repository", len(sync_repositories))))
dm.stream.write(
"Syncing {}...".format(inflect.no("repository", len(sync_repositories))),
)
with dm.stream.DoneManager(
suffix="\n",
) as sync_dm:
sync_command_template = '{} PullAndUpdate "/directory={{}}"'.format(CurrentShell.CreateScriptName("SCM"))
sync_command_template = '{} PullAndUpdate "/directory={{}}"'.format(
CurrentShell.CreateScriptName("SCM"),
)
for index, (repo_output_dir, data) in enumerate(sync_repositories):
sync_dm.stream.write("'{}' ({} of {})...".format(data[0], index + 1, len(sync_repositories)))
sync_dm.stream.write(
"'{}' ({} of {})...".format(
data[0],
index + 1,
len(sync_repositories),
),
)
with sync_dm.stream.DoneManager() as this_dm:
this_dm.result, output = Process.Execute(sync_command_template.format(repo_output_dir))
this_dm.result, output = Process.Execute(
sync_command_template.format(repo_output_dir),
)
if this_dm.result != 0:
this_dm.stream.write(output)
if sync_dm.result != 0:
return sync_dm.result
dm.stream.write("Setting up {}...".format(inflect.no("repository", len(repo_data))))
dm.stream.write(
"Setting up {}...".format(inflect.no("repository", len(repo_data))),
)
with dm.stream.DoneManager(
suffix="\n",
) as setup_dm:
command_line_template = "Setup{} {{suffix}}".format(CurrentShell.ScriptExtension)
command_line_template = "Setup{} {{suffix}}".format(
CurrentShell.ScriptExtension,
)
for index, (repo_output_dir, data) in enumerate(six.iteritems(repo_data)):
setup_dm.stream.write("'{}' ({} of {})...".format(data[0], index + 1, len(repo_data)))
setup_dm.stream.write(
"'{}' ({} of {})...".format(data[0], index + 1, len(repo_data)),
)
with setup_dm.stream.DoneManager() as this_dm:
prev_dir = os.getcwd()
os.chdir(repo_output_dir)
@ -238,7 +303,12 @@ def EntryPoint(
os.path.join(
_script_dir,
"Activate{}{}".format(
".{}".format(os.getenv("DEVELOPMENT_ENVIRONMENT_ENVIRONMENT_NAME")) if os.getenv("DEVELOPMENT_ENVIRONMENT_ENVIRONMENT_NAME") != "DefaultEnv" else "",
".{}".format(
os.getenv("DEVELOPMENT_ENVIRONMENT_ENVIRONMENT_NAME"),
)
if os.getenv("DEVELOPMENT_ENVIRONMENT_ENVIRONMENT_NAME")
!= "DefaultEnv"
else "",
CurrentShell.ScriptExtension,
),
),

Просмотреть файл

@ -131,7 +131,7 @@ using AnnotationMapsPtr = std::shared_ptr<AnnotationMaps>;
// TODO: Expect more classes with regards to Annotation as we use the functionality more.
/////////////////////////////////////////////////////////////////////////
/// \function CreateTestAnnotationMapsPtr
/// \fn CreateTestAnnotationMapsPtr
/// \brief An `Estimator` requires an `AnnotationMapsPtr` upon
/// construction. This method can be used to quickly create
/// one of these objects during testing.
@ -182,7 +182,7 @@ public:
FEATURIZER_MOVE_CONSTRUCTOR_ONLY(Estimator);
/////////////////////////////////////////////////////////////////////////
/// \function get_column_annotations
/// \fn get_column_annotations
/// \brief Returns the column annotations for all columns. Note
/// that this information is shared across all `Estimators`
/// with the DAG.
@ -204,13 +204,13 @@ protected:
Estimator(std::string name, AnnotationMapsPtr pAllColumnAnnotations);
/////////////////////////////////////////////////////////////////////////
/// \function add_annotation
/// \fn add_annotation
/// \brief Adds an `Annotation` to the specified column.
///
void add_annotation(AnnotationPtr pAnnotation, size_t col_index) const;
/////////////////////////////////////////////////////////////////////////
/// \function get_annotation_impl
/// \fn get_annotation_impl
/// \brief Helper method that can be used by derived class when implementation functionality
/// to retrieve `Annotation` data created by the derived class itself.
///
@ -262,7 +262,7 @@ public:
FEATURIZER_MOVE_CONSTRUCTOR_ONLY(FitEstimatorImpl);
/////////////////////////////////////////////////////////////////////////
/// \function is_training_complete
/// \fn is_training_complete
/// \brief Returns true if the `complete_training` method has been called
/// for this `Estimator`. `fit` should not be invoked on
/// an `Estimator` where training has been completed.
@ -270,7 +270,7 @@ public:
bool is_training_complete(void) const;
/////////////////////////////////////////////////////////////////////////
/// \function fit
/// \fn fit
/// \brief Method invoked during training. This method will be invoked until it returns `FitResult::Complete`
/// or no additional data is available. Derived classes should use this columnar data to create
/// state (either in the form of `Annotations`) used during the training process or state data that
@ -281,7 +281,7 @@ public:
FitResult fit(FitBufferInputType const *pInputBuffer, size_t cInputBuffer);
/////////////////////////////////////////////////////////////////////////
/// \function complete_training
/// \fn complete_training
/// \brief Completes the training process. Derived classes should use this method to produce any final state
/// that is used in calls to `transform` or to add `Annotations` for a column. This method should not be
/// invoked on an object that has already been completed.
@ -303,14 +303,14 @@ private:
// ----------------------------------------------------------------------
/////////////////////////////////////////////////////////////////////////
/// \function fit_impl
/// \fn fit_impl
/// \brief `fit` performs common object state and parameter validation before invoking
/// this abstract method.
///
virtual FitResult fit_impl(FitBufferInputType const *pBuffer, size_t cBuffer) = 0;
/////////////////////////////////////////////////////////////////////////
/// \function complete_training_impl
/// \fn complete_training_impl
/// \brief `complete_training` performs common object state validation before invoking this
/// abstract method.
///
@ -390,13 +390,13 @@ public:
FEATURIZER_MOVE_CONSTRUCTOR_ONLY(Transformer);
/////////////////////////////////////////////////////////////////////////
/// \function execute
/// \fn execute
/// \brief Produces a result for a given input.
///
virtual TransformedType execute(InputType input) = 0;
/////////////////////////////////////////////////////////////////////////
/// \function save
/// \fn save
/// \brief Saves the state of the object so it can be reconstructed
/// at a later time.
///
@ -416,7 +416,7 @@ public:
FEATURIZER_MOVE_CONSTRUCTOR_ONLY(TransformerEstimator);
/////////////////////////////////////////////////////////////////////////
/// \function has_created_transformer
/// \fn has_created_transformer
/// \brief Returns true if this object has been used to create
/// a `Transformer`. No methods should be called on the object
/// once it has been used to create a transformer.
@ -424,7 +424,7 @@ public:
bool has_created_transformer(void) const;
/////////////////////////////////////////////////////////////////////////
/// \function create_transformer
/// \fn create_transformer
/// \brief Creates a `Transformer` using the trained state of the
/// object. No methods should be called on the object once
/// it has been used to create a transformer.
@ -446,7 +446,7 @@ private:
// ----------------------------------------------------------------------
/////////////////////////////////////////////////////////////////////////
/// \function create_transformer_impl
/// \fn create_transformer_impl
/// \brief `create_transformer` performs common object state validation before
/// calling this method.
///
@ -606,6 +606,13 @@ Estimator::FitResult FitEstimatorImpl<InputT>::fit(InputType value) {
return fit(&value, 1);
}
// I'm not sure why MSVC thinks that the following code is unreachable
// with release builds.
#if (defined _MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4702) // Unreachable code
#endif
template <typename InputT>
Estimator::FitResult FitEstimatorImpl<InputT>::fit(FitBufferInputType const *pInputBuffer, size_t cInputBuffer) {
if(_is_training_complete)
@ -625,6 +632,10 @@ Estimator::FitResult FitEstimatorImpl<InputT>::fit(FitBufferInputType const *pIn
return result;
}
#if (defined _MSC_VER)
# pragma warning(pop)
#endif
template <typename InputT>
Estimator::FitResult FitEstimatorImpl<InputT>::complete_training(void) {
if(_is_training_complete)

Просмотреть файл

@ -105,7 +105,15 @@ void NumericTestWrapper(){
auto inferencingInput = make_vector<inputType>(static_cast<castType>(5),static_cast<castType>(8),static_cast<castType>(20)
,null,null,null,null);
auto inferencingOutput = make_vector<transformedType>(5,8,20,10,10,10,10);
auto inferencingOutput = make_vector<transformedType>(
static_cast<castType>(5),
static_cast<castType>(8),
static_cast<castType>(20),
static_cast<castType>(10),
static_cast<castType>(10),
static_cast<castType>(10),
static_cast<castType>(10)
);
NS::AnnotationMapsPtr const pAllColumnAnnotations(NS::CreateTestAnnotationMapsPtr(1));

Просмотреть файл

@ -77,8 +77,8 @@ TEST_CASE("Transformer_Vector") {
TEST_CASE("Transformer_Maps") {
std::map<std::int16_t, std::double_t> m;
m.insert(std::pair<std::int16_t, std::double_t>(5, 35.8));
m.insert(std::pair<std::int16_t, std::double_t>(93, 0.147));
m.insert(std::pair<std::int16_t, std::double_t>(static_cast<std::int16_t>(5), 35.8));
m.insert(std::pair<std::int16_t, std::double_t>(static_cast<std::int16_t>(93), 0.147));
std::string map_s{ "{5:35.800000,93:0.147000}" };
CHECK(NS::Featurizers::StringTransformer<std::map<std::int16_t, std::double_t>>().execute(m) == map_s);
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -43,6 +43,8 @@ def Build(
output_stream=sys.stdout,
verbose=False,
):
"""Builds the Featurizer Shared Library"""
with StreamDecorator(output_stream).DoneManager(
line_prefix="",
prefix="\nResults: ",
@ -53,7 +55,9 @@ def Build(
# ----------------------------------------------------------------------
def CleanupTempDir():
if keep_temp_dir:
dm.stream.write("\nCMake output has been written to '{}'.\n".format(temp_directory))
dm.stream.write(
"\nCMake output has been written to '{}'.\n".format(temp_directory),
)
return
FileSystem.RemoveTree(temp_directory)
@ -75,22 +79,15 @@ def Build(
this_dir=_script_dir,
),
),
(
"Building",
"cmake --build .",
),
(
"Copying Binaries",
_CopyBinaries,
),
(
"Copying Headers",
_CopyHeaders,
),
("Building", "cmake --build ."),
("Copying Binaries", _CopyBinaries),
("Copying Headers", _CopyHeaders),
]
for index, (activity, command_line) in enumerate(activities):
dm.stream.write("{} ({} of {})...".format(activity, index + 1, len(activities)))
dm.stream.write(
"{} ({} of {})...".format(activity, index + 1, len(activities)),
)
with dm.stream.DoneManager(
suffix="\n" if verbose else None,
) as this_dm:
@ -109,9 +106,16 @@ def Build(
this_output_stream = StreamDecorator(output_streams)
if callable(command_line):
this_dm.result = command_line(temp_directory, output_dir, this_output_stream)
this_dm.result = command_line(
temp_directory,
output_dir,
this_output_stream,
)
else:
this_dm.result = Process.Execute(command_line, this_output_stream)
this_dm.result = Process.Execute(
command_line,
this_output_stream,
)
if this_dm.result != 0:
if not verbose:
@ -134,6 +138,8 @@ def Clean(
output_dir,
output_stream=sys.stdout,
):
"""Cleans previously built content"""
with StreamDecorator(output_stream).DoneManager(
line_prefix="",
prefix="\nResults: ",
@ -149,26 +155,57 @@ def Clean(
return dm.result
# ----------------------------------------------------------------------
@CommandLine.EntryPoint(
# TODO
)
@CommandLine.Constraints(
windows_build_dir=CommandLine.DirectoryTypeInfo(),
linux_build_dir=CommandLine.DirectoryTypeInfo(),
output_dir=CommandLine.DirectoryTypeInfo(
ensure_exists=False,
),
output_stream=None,
)
def Package(
windows_build_dir,
linux_build_dir,
output_dir,
output_stream=sys.stdout,
verbose=False,
):
"""Packages previously built content"""
with StreamDecorator(output_stream).DoneManager(
line_prefix="",
prefix="\nResults: ",
suffix="\n",
) as dm:
# TODO: Implement this
return dm.result
# ----------------------------------------------------------------------
# ----------------------------------------------------------------------
# ----------------------------------------------------------------------
def _CopyBinaries(temp_directory, output_dir, output_stream):
if CurrentShell.CategoryName == "Windows":
output_files = [
"Featurizers.dll",
"Featurizers.pdb",
]
output_files = ["Featurizers.dll", "Featurizers.pdb"]
elif CurrentShell.CategoryName == "Linux":
output_files = [
"libFeaturizers.so",
]
output_files = ["libFeaturizers.so"]
else:
raise Exception("The Current Shell is not supported")
for index, output_file in enumerate(output_files):
output_stream.write("Copying '{}' ({} of {})...".format(output_file, index + 1, len(output_files)))
output_stream.write(
"Copying '{}' ({} of {})...".format(output_file, index + 1, len(output_files)),
)
with output_stream.DoneManager():
shutil.copyfile(os.path.join(temp_directory, output_file), os.path.join(output_dir, output_file))
shutil.copyfile(
os.path.join(temp_directory, output_file),
os.path.join(output_dir, output_file),
)
return 0
@ -183,9 +220,14 @@ def _CopyHeaders(temp_directory, output_dir, output_stream):
)
for index, output_file in enumerate(output_files):
output_stream.write("Copying '{}' ({} of {})...".format(output_file, index + 1, len(output_files)))
output_stream.write(
"Copying '{}' ({} of {})...".format(output_file, index + 1, len(output_files)),
)
with output_stream.DoneManager():
shutil.copyfile(output_file, os.path.join(output_dir, os.path.basename(output_file)))
shutil.copyfile(
output_file,
os.path.join(output_dir, os.path.basename(output_file)),
)
return 0

Просмотреть файл

@ -14,6 +14,13 @@ ErrorInfoHandle * CreateErrorInfo(std::exception const &ex);
extern "C" {
// I don't know why MSVC thinks that there is unreachable
// code in these methods during release builds.
#if (defined _MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4702) // Unreachable code
#endif
/* ---------------------------------------------------------------------- */
/* | */
/* | CatImputerFeaturizer <int8_t> */
@ -3515,4 +3522,9 @@ FEATURIZER_LIBRARY_API bool CatImputerFeaturizer_string_DestroyTransformedData(/
}
}
#if (defined _MSC_VER)
# pragma warning(pop)
#endif
} // extern "C"

Просмотреть файл

@ -14,6 +14,13 @@ ErrorInfoHandle * CreateErrorInfo(std::exception const &ex);
extern "C" {
// I don't know why MSVC thinks that there is unreachable
// code in these methods during release builds.
#if (defined _MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4702) // Unreachable code
#endif
/* ---------------------------------------------------------------------- */
/* | */
/* | DateTimeFeaturizer */
@ -405,4 +412,9 @@ FEATURIZER_LIBRARY_API bool DateTimeFeaturizer_DestroyTransformedData(/*in*/ Tim
}
}
#if (defined _MSC_VER)
# pragma warning(pop)
#endif
} // extern "C"

Просмотреть файл

@ -14,6 +14,13 @@ ErrorInfoHandle * CreateErrorInfo(std::exception const &ex);
extern "C" {
// I don't know why MSVC thinks that there is unreachable
// code in these methods during release builds.
#if (defined _MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4702) // Unreachable code
#endif
/* ---------------------------------------------------------------------- */
/* | */
/* | StringFeaturizer <int8_t> */
@ -3757,4 +3764,9 @@ FEATURIZER_LIBRARY_API bool StringFeaturizer_string_DestroyTransformedData(/*in*
}
}
#if (defined _MSC_VER)
# pragma warning(pop)
#endif
} // extern "C"

Просмотреть файл

@ -412,7 +412,10 @@ def _GenerateHeaderFile(output_dir, items, c_data_items, output_stream):
""",
).format(
struct_name,
StringHelpers.LeftJustify("\n".join(["{};".format(member) for member in members]), 4).strip(),
StringHelpers.LeftJustify(
"\n".join(["{};".format(member) for member in members]),
4,
).strip(),
),
)
@ -447,7 +450,9 @@ def _GenerateHeaderFile(output_dir, items, c_data_items, output_stream):
{delete_transformed_method}
""",
).format(
custom_structs="" if not custom_structs else "{}\n\n".format(custom_structs.strip()),
custom_structs="" if not custom_structs else "{}\n\n".format(
custom_structs.strip(),
),
construct_params="{}, ".format(
", ".join(construct_params),
) if construct_params else "",
@ -466,9 +471,7 @@ def _GenerateHeaderFile(output_dir, items, c_data_items, output_stream):
).ParameterDecl,
),
transform_output_param=", ".join(
c_data.TransformedTypeInfo.GetOutputInfo(
"output",
).ParameterDecl,
c_data.TransformedTypeInfo.GetOutputInfo("output").ParameterDecl,
),
delete_transformed_method=delete_transformed_method,
**d
@ -509,6 +512,13 @@ def _GenerateCppFile(output_dir, items, c_data_items, output_stream):
extern "C" {{
// I don't know why MSVC thinks that there is unreachable
// code in these methods during release builds.
#if (defined _MSC_VER)
# pragma warning(push)
# pragma warning(disable: 4702) // Unreachable code
#endif
""",
).format(
name=items[0].name,
@ -926,6 +936,11 @@ def _GenerateCppFile(output_dir, items, c_data_items, output_stream):
f.write(
textwrap.dedent(
"""\
#if (defined _MSC_VER)
# pragma warning(pop)
#endif
} // extern "C"
""",
),
@ -1233,11 +1248,7 @@ class _ScalarTypeInfo(TypeInfo):
"""if({name} == nullptr) throw std::invalid_argument("'{name}' is null");""".format(
name=arg_name,
),
"{}{} = {};".format(
"" if is_struct else "*",
arg_name,
result_name,
),
"{}{} = {};".format("" if is_struct else "*", arg_name, result_name),
)
# ----------------------------------------------------------------------
@ -1271,7 +1282,11 @@ class _StringTypeInfo(TypeInfo):
)
invocation = invocation_template.format(arg_name)
return cls.Info(["/*in*/ char const *{}".format(arg_name)], validation, invocation)
return cls.Info(
["/*in*/ char const *{}".format(arg_name)],
validation,
invocation,
)
# ----------------------------------------------------------------------
@classmethod
@ -1357,7 +1372,7 @@ class _StringTypeInfo(TypeInfo):
),
"/*out*/ std::size_t{pointer} {name}_items".format(
name=arg_name,
pointer=""if is_struct else " *",
pointer="" if is_struct else " *",
),
],
textwrap.dedent(
@ -1461,25 +1476,14 @@ class _TimePointTypeInfo(TypeInfo):
for member_name, member_info in six.iteritems(self._member_info):
member_statements.append(
member_info.GetOutputInfo(
"{}.{}".format(
result_time_point_name,
member_name,
),
"{}.{}".format(
result_name,
member_name,
),
"{}.{}".format(result_time_point_name, member_name),
"{}.{}".format(result_name, member_name),
is_struct=True,
).InvocationStatement,
)
return self.Info(
[
"/*out*/ TimePoint *{} {}".format(
"" if is_struct else "*",
arg_name,
),
],
["/*out*/ TimePoint *{} {}".format("" if is_struct else "*", arg_name)],
"""if({name} == nullptr) throw std::invalid_argument("'{name}' is null");""".format(
name=arg_name,
),

Просмотреть файл

@ -2,7 +2,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License
// ----------------------------------------------------------------------
#pragma once
#include <array>
@ -26,7 +25,7 @@ namespace Featurizer {
/////////////////////////////////////////////////////////////////////////
/// \struct Traits
/// \brief We have a range of of types we are dealing with. Many types
/// have different ways to represent what a `NULL` value is
/// have different ways to represent what a 'NULL' value is
/// (float has NAN for example) as well as different ways to
/// convert the value to a string representation. By using
/// templates combined with partial template specialization

Просмотреть файл

@ -19,7 +19,13 @@ _script_dir, _script_name = os.path.split(_script_fullpath)
sys.path.insert(0, os.path.join(_script_dir, "..", ".."))
with CallOnExit(lambda: sys.path.pop(0)):
from Impl.CodeGenerator import *
try:
from Impl.CodeGenerator import *
except ModuleNotFoundError:
# If here, it might be that clang is not available
if os.getenv("DEVELOPMENT_ENVIRONMENT_REPOSITORY_CONFIGURATION") != "x64":
sys.stdout.write("Clang is not available in this configuration.\nOK <- this is enough to convince the test parser that tests were successful.\n")
sys.exit(0)
# ----------------------------------------------------------------------
class StandardTests(unittest.TestCase):

Просмотреть файл

@ -24,7 +24,13 @@ _script_dir, _script_name = os.path.split(_script_fullpath)
sys.path.insert(0, os.path.join(_script_dir, ".."))
with CallOnExit(lambda: sys.path.pop(0)):
from ContentExtractor import *
try:
from ContentExtractor import *
except ModuleNotFoundError:
# If here, it might be that clang is not available
if os.getenv("DEVELOPMENT_ENVIRONMENT_REPOSITORY_CONFIGURATION") != "x64":
sys.stdout.write("Clang is not available in this configuration.\nOK <- this is enough to convince the test parser that tests were successful.\n")
sys.exit(0)
# ----------------------------------------------------------------------
class StandardTests(unittest.TestCase):