STL/stl/inc/cvt/one_one

243 строки
11 KiB
C++

// codecvt facet for multibyte code as serialized wide-character code
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#ifndef _CVT_ONE_ONE_
#define _CVT_ONE_ONE_
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR
#include <codecvt>
#include <cwchar>
#include <locale>
#pragma pack(push, _CRT_PACKING)
#pragma warning(push, _STL_WARNING_LEVEL)
#pragma warning(disable : _STL_DISABLED_WARNINGS)
_STL_DISABLE_CLANG_WARNINGS
#pragma push_macro("new")
#undef new
#if !_HAS_IF_CONSTEXPR
#pragma warning(disable : 4127) // conditional expression is constant
#endif // !_HAS_IF_CONSTEXPR
namespace stdext {
namespace cvt {
using _Statype = _CSTD mbstate_t;
_STL_DISABLE_DEPRECATED_WARNING
// CLASS TEMPLATE codecvt_one_one
template <class _Elem, unsigned long _Maxcode = 0xffffffff, _STD codecvt_mode _Mode = _STD codecvt_mode{},
size_t _Bytes_per_word = sizeof(_Elem)>
class codecvt_one_one
: public _STD
codecvt<_Elem, char, _Statype> { // facet for converting between _Elem and serialized byte sequences
public:
static_assert(_Bytes_per_word >= 1 && _Bytes_per_word <= 4, "bad byte count parameter");
using _Mybase = _STD codecvt<_Elem, char, _Statype>;
using result = typename _Mybase::result;
using _Byte = char;
using intern_type = _Elem;
using extern_type = _Byte;
using state_type = _Statype;
explicit codecvt_one_one(size_t _Refs = 0) : _Mybase(_Refs) {}
virtual ~codecvt_one_one() noexcept {}
protected:
virtual result do_in(_Statype& _State, const _Byte* _First1, const _Byte* _Last1, const _Byte*& _Mid1,
_Elem* _First2, _Elem* _Last2, _Elem*& _Mid2) const override {
// convert bytes [_First1, _Last1) to [_First2, _Last)
char* _Pstate = reinterpret_cast<char*>(&_State);
size_t _Count;
_Mid1 = _First1;
_Mid2 = _First2;
while (_Bytes_per_word <= _Last1 - _Mid1 && _Mid2 != _Last2) { // convert a multibyte sequence
const auto _Ptr = reinterpret_cast<const unsigned char*>(_Mid1);
unsigned long _Ch = 0;
if (*_Pstate == _STD _Little_first) {
for (_Count = _Bytes_per_word; 0 < _Count;) {
_Ch = _Ch << 8 | _Ptr[--_Count];
}
} else if (*_Pstate == _STD _Big_first) {
for (_Count = 0; _Count < _Bytes_per_word; ++_Count) {
_Ch = _Ch << 8 | _Ptr[_Count];
}
} else { // no header seen yet, try preferred mode
constexpr unsigned char _Default_endian = static_cast<unsigned char>(
(_Mode & _STD little_endian) != 0 ? _STD _Little_first : _STD _Big_first);
if _CONSTEXPR_IF ((_Mode & _STD little_endian) != 0) {
for (_Count = _Bytes_per_word; 0 < _Count;) {
_Ch = _Ch << 8 | _Ptr[--_Count];
}
} else {
for (_Count = 0; _Count < _Bytes_per_word; ++_Count) {
_Ch = _Ch << 8 | _Ptr[_Count];
}
}
if _CONSTEXPR_IF ((_Mode & _STD consume_header) == 0) {
*_Pstate = _Default_endian;
} else if (_Ch != 0xfeff && _Ch != (0xfffe0000 >> 8 * (4 - _Bytes_per_word))) {
*_Pstate = _Default_endian;
} else { // consume header, fixate on endianness, and retry
_Mid1 += _Bytes_per_word;
*_Pstate = static_cast<char>(
_Ch == 0xfeff ? _Default_endian : static_cast<unsigned char>(3 - _Default_endian));
result _Ans = do_in(_State, _Mid1, _Last1, _Mid1, _First2, _Last2, _Mid2);
if (_Ans == _Mybase::partial) { // not enough bytes, roll back header
*_Pstate = 0;
_Mid1 = _First1;
}
return _Ans;
}
}
_Mid1 += _Bytes_per_word;
if (_Maxcode < _Ch) {
return _Mybase::error; // code too large
}
*_Mid2++ = static_cast<_Elem>(_Ch);
}
return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
}
virtual result do_out(_Statype& _State, const _Elem* _First1, const _Elem* _Last1, const _Elem*& _Mid1,
_Byte* _First2, _Byte* _Last2, _Byte*& _Mid2) const override {
// convert [_First1, _Last1) to bytes [_First2, _Last)
char* _Pstate = reinterpret_cast<char*>(&_State);
size_t _Count;
_Mid1 = _First1;
_Mid2 = _First2;
if (*_Pstate == 0) { // determine endianness once, maybe generate header
unsigned long _Header = 0xfeff;
if _CONSTEXPR_IF ((_Mode & _STD little_endian) != 0) {
*_Pstate = _STD _Little_first;
} else {
*_Pstate = _STD _Big_first;
}
if _CONSTEXPR_IF ((_Mode & _STD generate_header) == 0) {
(void) _Header; // unused
} else if (_Last2 - _Mid2 < 2 * _Bytes_per_word) {
return _Mybase::partial; // not enough room for both
} else if (*_Pstate == _STD _Little_first) {
for (_Count = 0; _Count < _Bytes_per_word; ++_Count) { // put LS byte first
*_Mid2++ = static_cast<_Byte>(static_cast<unsigned char>(_Header));
_Header >>= 8;
}
} else {
for (_Header <<= 8 * (4 - _Bytes_per_word), _Count = 0; _Count < _Bytes_per_word; ++_Count) {
// put MS byte first
*_Mid2++ = static_cast<_Byte>(static_cast<unsigned char>(_Header >> 24));
_Header <<= 8;
}
}
}
while (_Mid1 != _Last1 && _Bytes_per_word <= _Last2 - _Mid2) { // convert and put a wide char
unsigned long _Ch = static_cast<unsigned long>(*_Mid1++);
if (_Maxcode < _Ch) {
return _Mybase::error;
}
if (*_Pstate == _STD _Little_first) {
for (_Count = 0; _Count < _Bytes_per_word; ++_Count) { // put LS byte first
*_Mid2++ = static_cast<_Byte>(static_cast<unsigned char>(_Ch));
_Ch >>= 8;
}
} else {
for (_Ch <<= 8 * (4 - _Bytes_per_word), _Count = 0; _Count < _Bytes_per_word; ++_Count) {
// put MS byte first
*_Mid2++ = static_cast<_Byte>(static_cast<unsigned char>(_Ch >> 24));
_Ch <<= 8;
}
}
}
return _First1 == _Mid1 ? _Mybase::partial : _Mybase::ok;
}
virtual result do_unshift(_Statype&, _Byte* _First2, _Byte*, _Byte*& _Mid2) const override {
// generate bytes to return to default shift state
_Mid2 = _First2;
return _Mybase::ok;
}
virtual int do_length(
_Statype& _State, const _Byte* _First1, const _Byte* _Last1, size_t _Count) const noexcept override {
// return min(_Count, converted length of bytes [_First1, _Last1))
size_t _Wchars = 0;
_Statype _Mystate = _State;
while (_Wchars < _Count && _First1 != _Last1) { // convert another wide character
const _Byte* _Mid1;
_Elem* _Mid2;
_Elem _Ch;
switch (do_in(_Mystate, _First1, _Last1, _Mid1, &_Ch, &_Ch + 1,
_Mid2)) { // test result of single wide-char conversion
case _Mybase::noconv:
return static_cast<int>(_Wchars + (_Last1 - _First1));
case _Mybase::ok:
if (_Mid2 == &_Ch + 1) {
++_Wchars; // replacement do_in might not convert one
}
_First1 = _Mid1;
break;
default:
return static_cast<int>(_Wchars); // error or partial
}
}
return static_cast<int>(_Wchars);
}
virtual bool do_always_noconv() const noexcept override { // return true if conversions never change input
return false;
}
virtual int do_max_length() const noexcept override { // return maximum length required for a conversion
if _CONSTEXPR_IF ((_Mode & (_STD consume_header | _STD generate_header)) != 0) {
return 2 * _Bytes_per_word;
} else {
return _Bytes_per_word;
}
}
virtual int do_encoding() const noexcept override { // return length of code sequence (from codecvt)
if _CONSTEXPR_IF ((_Mode & (_STD consume_header | _STD generate_header)) != 0) {
return -1; // -1 => state dependent
} else {
return static_cast<int>(_Bytes_per_word);
}
}
};
_STL_RESTORE_DEPRECATED_WARNING
} // namespace cvt
} // namespace stdext
#pragma pop_macro("new")
_STL_RESTORE_CLANG_WARNINGS
#pragma warning(pop)
#pragma pack(pop)
#endif // _STL_COMPILER_PREPROCESSOR
#endif // _CVT_ONE_ONE_