Full windows support with ssplit from browsermt, not a fork (#109)

* Update marian-dev to the newest mac version

* Attempt windows workflow

* force workflow rerun

* Separate id

* Attempt 3 at github action

* Marian dev submodule now compiles with apple clang

* Updated ssplit version to something more recent

* Attempt to fix compile on wasm

* Do not compile subproject tests

* Fix emscripten compilation on Mac

* 99% on the way to windows compile

* Try with a different generator

* Build release not debug

* Revert CMakeLists.txt hacks

* Fix sse2 compilation failure

* MSVC settings for WIN32

* Add nodefaultlib LIBCMT

* Do not compile ssplit.cpp as it contains sys/mman.h

* Revert ab56b9aa4f4360b0ab98d5806658d4302f31db1d

* Update paths

* Set the build type to release if not set previously

* Attempt to build release with the windows workflow

* Attempt 5 at VS studio release build

* Attempt 6 at getting release build on MSVC generator

* The windows build is debug at the moment...

* fix ssplit for ubuntu 16.04

* Fix compilation with clang

* Compile on ubuntu16.04

* Explain what is going on

* Updated ssplit and workflow
This commit is contained in:
Nikolay Bogoychev 2021-05-01 00:29:23 +01:00 коммит произвёл GitHub
Родитель e286533164
Коммит d82e01eda4
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
6 изменённых файлов: 51 добавлений и 8 удалений

13
.github/workflows/windows.yml поставляемый
Просмотреть файл

@ -37,7 +37,7 @@ jobs:
shell: powershell
- name: Prepare vcpkg
uses: lukka/run-vcpkg@v4
uses: lukka/run-vcpkg@v7.3
with:
vcpkgArguments: protobuf pcre2
vcpkgGitCommitId: 6185aa76504a5025f36754324abf307cc776f3da
@ -45,22 +45,23 @@ jobs:
vcpkgTriplet: x64-windows-static
# Windows CPU only minimal build
- name: Build Debug
- name: Build Release # @TODO this is actually a debug build until the ninja generator gets fixed
uses: lukka/run-cmake@v3
with:
buildDirectory: ${{ github.workspace }}/build/Debug
buildDirectory: ${{ github.workspace }}/build
cmakeAppendedArgs: '-G Ninja
-DCMAKE_BUILD_TYPE="Debug"
-DCMAKE_BUILD_TYPE="Release"
-DUSE_WASM_COMPATIBLE_SOURCE="OFF"
-DUSE_STATIC_LIBS="TRUE"'
cmakeListsOrSettingsJson: CMakeListsTxtAdvanced
cmakeListsTxtPath: ${{ github.workspace }}/CMakeLists.txt
useVcpkgToolchainFile: true
cmakeBuildType: Release
- name: Print versions
working-directory: build/
working-directory: build
run: |
.\app\bergamot-translator-app.exe --version
.\app\service-cli.exe --version
dir *.exe
shell: cmd

2
3rd_party/ssplit-cpp поставляемый

@ -1 +1 @@
Subproject commit dfefe34218fe3aced70266994b6557f029fcbdde
Subproject commit 8d338ed5c77d22f8c86f60554596fa57bf5091e6

Просмотреть файл

@ -9,6 +9,28 @@ project(bergamot_translator CXX C)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Note that with CMake MSVC build, the option CMAKE_BUILD_TYPE is automatically derived from the key
# 'configurationType' in CMakeSettings.json configurations
if(NOT CMAKE_BUILD_TYPE)
message(WARNING "CMAKE_BUILD_TYPE not set; setting to Release")
set(CMAKE_BUILD_TYPE "Release")
endif()
#MSVC can't seem to pick up correct flags otherwise:
if(MSVC)
add_definitions(-DUSE_SSE2=1) # Supposed to fix something in the sse_mathfun.h but not sure it does
set(INTRINSICS "/arch:AVX2") # ARCH we're targetting on win32. @TODO variable
set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG")
# ignores warning LNK4049: locally defined symbol free imported - this comes from zlib
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /ignore:4049")
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT")
set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRTD")
set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental")
endif(MSVC)
include(CMakeDependentOption)
# Project specific cmake options
@ -22,11 +44,12 @@ SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be package
SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
SET(SSPLIT_COMPILE_LIBRARY_ONLY ON CACHE BOOL "Do not compile ssplit tests")
if (USE_WASM_COMPATIBLE_SOURCE)
SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
# # Setting the ssplit-cpp submodule specific cmake options for wasm
SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
SET(SSPLIT_USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
endif()
# Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html

Просмотреть файл

@ -8,6 +8,7 @@
#include <string>
#include <vector>
#include "translator/definitions.h"
/* All possible Granularities for which Quality Scores can be returned for
* translated text. */

Просмотреть файл

@ -28,4 +28,21 @@ typedef AlignedVector<char> AlignedMemory;
} // namespace bergamot
} // namespace marian
// @TODO at the moment the usage of string_view in this repository is a hot mess and a disaster waiting to happen.
// ssplit uses std::string_view if the compiler supports c++17, else falls back to c++11 and absl::string_view
// bergamot-translator uses, depending on the source file std::string_view (which will break if ssplit-cpp uses
// absl::string_view) and marian::string_view which is an export of (confusingly) the sentencepiece module that
// marian has. marian::string_view is our addition to the marian fork, which will make merging even nicer. Not.
// This is just an ugly patchwork that allos gcc5, our lowest targetted gcc to run. We don't actually try to run
// on older compilers.
#if defined(__GNUC__) && __GNUC__ < 6 && !defined(__clang__)
#include <experimental/string_view>
namespace std {
using string_view = std::experimental::string_view;
} // namespace std
#else
#include <string_view>
#endif
#endif // SRC_BERGAMOT_DEFINITIONS_H_

Просмотреть файл

@ -4,6 +4,7 @@
#include "common/options.h"
#include "data/types.h"
#include "ssplit.h"
#include "definitions.h"
#include <string>
namespace marian {