зеркало из https://github.com/microsoft/STL.git
`<format>` assumes strings are encoded in the active code page (#1834)
This commit is contained in:
Родитель
a10865713d
Коммит
ccc5aaaadc
|
@ -249,6 +249,7 @@ endforeach()
|
|||
# Objs that exist in both libcpmt[d][01].lib and msvcprt[d].lib.
|
||||
set(IMPLIB_SOURCES
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/format.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp
|
||||
|
|
413
stl/inc/format
413
stl/inc/format
|
@ -53,6 +53,7 @@
|
|||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <xfilesystem_abi.h>
|
||||
#include <xutility>
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
|
@ -62,6 +63,8 @@ _STL_DISABLE_CLANG_WARNINGS
|
|||
#pragma push_macro("new")
|
||||
#undef new
|
||||
|
||||
extern "C" _NODISCARD __std_win_error __stdcall __std_get_cvt(__std_code_page _Codepage, _Cvtvec* _Pcvt) noexcept;
|
||||
|
||||
_STD_BEGIN
|
||||
|
||||
class format_error : public runtime_error {
|
||||
|
@ -444,95 +447,232 @@ _NODISCARD constexpr bool _Is_execution_charset_utf8() {
|
|||
#pragma warning(pop)
|
||||
}
|
||||
|
||||
inline constexpr bool _Is_execution_charset_utf8_v = _Is_execution_charset_utf8();
|
||||
inline constexpr char16_t _Width_estimate_low_intervals[] = { // Per N4885 [format.string.std]/11
|
||||
0x1100u, 0x1160u, 0x2329u, 0x232Bu, 0x2E80u, 0x303Fu, 0x3040u, 0xA4D0u, 0xAC00u, 0xD7A4u, 0xF900u, 0xFB00u, 0xFE10u,
|
||||
0xFE1Au, 0xFE30u, 0xFE70u, 0xFF00u, 0xFF61u, 0xFFE0u, 0xFFE7u};
|
||||
|
||||
_NODISCARD constexpr int _Utf8_code_units_in_next_character(
|
||||
const char* const _First, const char* const _Last) noexcept {
|
||||
// Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First, _Last),
|
||||
// or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
|
||||
const auto _Ch = static_cast<unsigned char>(*_First);
|
||||
if (_Ch < 0b1000'0000u) {
|
||||
return 1;
|
||||
inline constexpr char32_t _Width_estimate_high_intervals[] = { // Per N4885 [format.string.std]/11
|
||||
0x1F300u, 0x1F650u, 0x1F900u, 0x1FA00u, 0x20000u, 0x2FFFEu, 0x30000u, 0x3FFFEu};
|
||||
|
||||
template <auto& _Bounds>
|
||||
_NODISCARD constexpr int _Unicode_width_estimate(const char32_t _Ch) noexcept {
|
||||
// Computes the width estimation for Unicode characters from N4885 [format.string.std]/11
|
||||
int _Result = 1;
|
||||
for (const auto& _Bound : _Bounds) {
|
||||
if (_Ch < _Bound) {
|
||||
return _Result;
|
||||
}
|
||||
_Result ^= 0b11u; // Flip between 1 and 2 on each iteration
|
||||
}
|
||||
|
||||
const auto _Len = static_cast<size_t>(_Last - _First);
|
||||
|
||||
if (_Ch < 0b1110'0000u) {
|
||||
// check for non-lead byte or partial 2-byte encoded character
|
||||
return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1;
|
||||
}
|
||||
|
||||
if (_Ch < 0b1111'0000u) {
|
||||
// check for partial 3-byte encoded character
|
||||
return (_Len >= 3) ? 3 : -1;
|
||||
}
|
||||
|
||||
// check for partial 4-byte encoded character
|
||||
return (_Len >= 4) ? 4 : -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
_NODISCARD inline int _Double_byte_encoding_code_units_in_next_character(
|
||||
const char* const _First, const char* const _Last, const _Cvtvec& _Cvt) {
|
||||
// Returns a count of the number of code units that compose the first encoded character in [_First, _Last),
|
||||
// or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
|
||||
wchar_t _Wide;
|
||||
mbstate_t _St{};
|
||||
const auto _Len = static_cast<size_t>(_Last - _First);
|
||||
const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
|
||||
if (_Result > 0) {
|
||||
return _Result;
|
||||
} else if (_Result < 0) { // invalid or incomplete encoded character
|
||||
return -1;
|
||||
} else { // next code unit is '\0'
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
template <class _CharT, bool _Statically_Utf8 = _Is_execution_charset_utf8()>
|
||||
class _Fmt_codec;
|
||||
|
||||
_NODISCARD inline int _Code_units_in_next_character(const char* _First, const char* _Last, const _Cvtvec& _Cvt) {
|
||||
// Returns a count of the number of code units that compose the first encoded character in
|
||||
// [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
|
||||
// *_First is not a valid lead byte.
|
||||
_STL_INTERNAL_CHECK(_First < _Last);
|
||||
template <bool _Statically_Utf8>
|
||||
class _Fmt_codec_base {};
|
||||
|
||||
if constexpr (_Is_execution_charset_utf8_v) {
|
||||
return _Utf8_code_units_in_next_character(_First, _Last);
|
||||
} else {
|
||||
switch (_Cvt._Mbcurmax) {
|
||||
default:
|
||||
_STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page");
|
||||
[[fallthrough]];
|
||||
case 1:
|
||||
return 1; // all characters have only one code unit
|
||||
template <>
|
||||
class _Fmt_codec_base<false> {
|
||||
protected:
|
||||
_Cvtvec _Cvt;
|
||||
|
||||
case 2:
|
||||
return _Double_byte_encoding_code_units_in_next_character(_First, _Last, _Cvt);
|
||||
|
||||
case 4: // Assume UTF-8 (as does _Mbrtowc)
|
||||
return _Utf8_code_units_in_next_character(_First, _Last);
|
||||
_NODISCARD int _Double_byte_encoding_code_units_in_next_character(
|
||||
const char* const _First, const char* const _Last) const {
|
||||
// Returns a count of the number of code units that compose the first encoded character in [_First, _Last),
|
||||
// or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
|
||||
wchar_t _Wide;
|
||||
mbstate_t _St{};
|
||||
const auto _Len = static_cast<size_t>(_Last - _First);
|
||||
const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
|
||||
if (_Result > 0) {
|
||||
return _Result;
|
||||
} else if (_Result < 0) { // invalid or incomplete encoded character
|
||||
return -1;
|
||||
} else { // next code unit is '\0'
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_NODISCARD inline int _Code_units_in_next_character(const wchar_t* _First, const wchar_t* _Last, const _Cvtvec&) {
|
||||
// Returns a count of the number of code units that compose the first encoded character in
|
||||
// [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
|
||||
// *_First is an unpaired surrogate.
|
||||
_STL_INTERNAL_CHECK(_First < _Last);
|
||||
_Fmt_codec_base() {
|
||||
#ifndef _FORMAT_CODEPAGE
|
||||
#define _FORMAT_CODEPAGE __std_code_page::_Acp
|
||||
#endif // _FORMAT_CODEPAGE
|
||||
[[maybe_unused]] const __std_win_error _Result = __std_get_cvt(_FORMAT_CODEPAGE, &_Cvt);
|
||||
_STL_INTERNAL_CHECK(_Result == __std_win_error::_Success);
|
||||
#undef _FORMAT_CODEPAGE
|
||||
}
|
||||
};
|
||||
|
||||
if (*_First < 0xD800u || *_First >= 0xE000u) {
|
||||
return 1;
|
||||
template <bool _Statically_Utf8>
|
||||
class _Fmt_codec<char, _Statically_Utf8> : private _Fmt_codec_base<_Statically_Utf8> {
|
||||
private:
|
||||
_NODISCARD static constexpr int _Utf8_code_units_in_next_character(
|
||||
const char* const _First, const char* const _Last) noexcept {
|
||||
// Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First,
|
||||
// _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead
|
||||
// byte.
|
||||
const auto _Ch = static_cast<unsigned char>(*_First);
|
||||
if (_Ch < 0b1000'0000u) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
const auto _Len = static_cast<size_t>(_Last - _First);
|
||||
|
||||
if (_Ch < 0b1110'0000u) {
|
||||
// check for non-lead byte or partial 2-byte encoded character
|
||||
return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1;
|
||||
}
|
||||
|
||||
if (_Ch < 0b1111'0000u) {
|
||||
// check for partial 3-byte encoded character
|
||||
return (_Len >= 3) ? 3 : -1;
|
||||
}
|
||||
|
||||
// check for partial 4-byte encoded character
|
||||
return (_Len >= 4) ? 4 : -1;
|
||||
}
|
||||
|
||||
if (*_First >= 0xDC00u) { // unpaired low surrogate
|
||||
return -1;
|
||||
_NODISCARD static int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept {
|
||||
// Return an estimate for the width of the character composed of _Units code units,
|
||||
// whose first code unit is denoted by _Ptr.
|
||||
auto _Ch = static_cast<char32_t>(*_Ptr);
|
||||
switch (_Units) {
|
||||
default:
|
||||
case 1:
|
||||
case 2:
|
||||
return 1;
|
||||
case 3:
|
||||
_Ch &= 0b1111u;
|
||||
break;
|
||||
case 4:
|
||||
_Ch &= 0b111u;
|
||||
break;
|
||||
}
|
||||
|
||||
for (int _Idx = 1; _Idx < _Units; ++_Idx) {
|
||||
_Ch = _Ch << 6 | (_Ptr[_Idx] & 0b11'1111u);
|
||||
}
|
||||
|
||||
if (_Units == 3) {
|
||||
return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
|
||||
}
|
||||
|
||||
return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
|
||||
}
|
||||
|
||||
if (++_First == _Last || *_First < 0xDC00u || *_First >= 0xE000u) { // unpaired high surrogate
|
||||
return -1;
|
||||
public:
|
||||
_NODISCARD int _Units_in_next_character(const char* const _First, const char* const _Last) const noexcept {
|
||||
// Returns a count of the number of code units that compose the first encoded character in
|
||||
// [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
|
||||
// *_First is not a valid lead byte.
|
||||
_STL_INTERNAL_CHECK(_First < _Last);
|
||||
|
||||
if constexpr (_Statically_Utf8) {
|
||||
return _Utf8_code_units_in_next_character(_First, _Last);
|
||||
} else {
|
||||
switch (this->_Cvt._Mbcurmax) {
|
||||
default:
|
||||
_STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page");
|
||||
[[fallthrough]];
|
||||
case 1:
|
||||
return 1; // all characters have only one code unit
|
||||
|
||||
case 2:
|
||||
return this->_Double_byte_encoding_code_units_in_next_character(_First, _Last);
|
||||
|
||||
case 4: // Assume UTF-8 (as does _Mbrtowc)
|
||||
return _Utf8_code_units_in_next_character(_First, _Last);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 2; // surrogate pair
|
||||
}
|
||||
_NODISCARD const char* _Find_encoded(const char* _First, const char* const _Last, const char _Val) const {
|
||||
// Returns the first occurrence of _Val as an encoded character (and not, for example, as a
|
||||
// continuation byte) in [_First, _Last).
|
||||
if constexpr (_Statically_Utf8) {
|
||||
return _Find_unchecked(_First, _Last, _Val);
|
||||
} else {
|
||||
if (this->_Cvt._Mbcurmax == 1 || this->_Cvt._Mbcurmax == 4) {
|
||||
// As above and in _Mbrtowc, assume 4-byte encodings are UTF-8
|
||||
return _Find_unchecked(_First, _Last, _Val);
|
||||
}
|
||||
|
||||
while (_First != _Last && *_First != _Val) {
|
||||
const int _Units = _Units_in_next_character(_First, _Last);
|
||||
if (_Units < 0) {
|
||||
_THROW(format_error("Invalid encoded character in format string."));
|
||||
}
|
||||
_First += _Units;
|
||||
}
|
||||
|
||||
return _First;
|
||||
}
|
||||
}
|
||||
|
||||
_NODISCARD int _Estimate_width(const char* const _Ptr, const int _Units) const {
|
||||
// Return an estimate for the width of the character composed of _Units code units,
|
||||
// whose first code unit is denoted by _Ptr.
|
||||
if constexpr (_Statically_Utf8) {
|
||||
return _Estimate_utf8_character_width(_Ptr, _Units);
|
||||
} else {
|
||||
if (this->_Cvt._Mbcurmax != 4) {
|
||||
// not a Unicode encoding; estimate width == number of code units
|
||||
return _Units;
|
||||
}
|
||||
|
||||
// assume UTF-8
|
||||
return _Estimate_utf8_character_width(_Ptr, _Units);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <bool _Statically_Utf8>
|
||||
class _Fmt_codec<wchar_t, _Statically_Utf8> {
|
||||
public:
|
||||
_NODISCARD int _Units_in_next_character(const wchar_t* _First, const wchar_t* const _Last) const noexcept {
|
||||
// Returns a count of the number of code units that compose the first encoded character in
|
||||
// [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
|
||||
// *_First is an unpaired surrogate.
|
||||
_STL_INTERNAL_CHECK(_First < _Last);
|
||||
|
||||
if (*_First < 0xD800u || *_First >= 0xE000u) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (*_First >= 0xDC00u) { // unpaired low surrogate
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (++_First == _Last || *_First < 0xDC00u || *_First >= 0xE000u) { // unpaired high surrogate
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 2; // surrogate pair
|
||||
}
|
||||
|
||||
_NODISCARD const wchar_t* _Find_encoded(
|
||||
const wchar_t* const _First, const wchar_t* const _Last, const wchar_t _Val) const {
|
||||
return _Find_unchecked(_First, _Last, _Val);
|
||||
}
|
||||
|
||||
_NODISCARD int _Estimate_width(const wchar_t* const _Ptr, const int _Units) const {
|
||||
// Return an estimate for the width of the character composed of _Units code units,
|
||||
// whose first code unit is denoted by _Ptr.
|
||||
auto _Ch = static_cast<char32_t>(*_Ptr);
|
||||
if (_Units == 1) {
|
||||
return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
|
||||
}
|
||||
|
||||
// surrogate pair
|
||||
_Ch = (_Ch - 0xD800u) << 10;
|
||||
_Ch += static_cast<char32_t>(_Ptr[1]) - 0xDC00u;
|
||||
_Ch += 0x10000u;
|
||||
return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
|
||||
}
|
||||
};
|
||||
|
||||
template <class _CharT, _Parse_align_callbacks<_CharT> _Callbacks_type>
|
||||
_NODISCARD const _CharT* _Parse_align(const _CharT* _Begin, const _CharT* _End, _Callbacks_type&& _Callbacks) {
|
||||
|
@ -540,7 +680,7 @@ _NODISCARD const _CharT* _Parse_align(const _CharT* _Begin, const _CharT* _End,
|
|||
_STL_INTERNAL_CHECK(_Begin != _End && *_Begin != '}');
|
||||
auto _Parsed_align = _Fmt_align::_None;
|
||||
|
||||
const int _Units = _Code_units_in_next_character(_Begin, _End, _Getcvt());
|
||||
const int _Units = _Fmt_codec<_CharT>{}._Units_in_next_character(_Begin, _End);
|
||||
if (_Units < 0) { // invalid fill character encoding
|
||||
_THROW(format_error("Invalid format string."));
|
||||
}
|
||||
|
@ -800,44 +940,19 @@ _NODISCARD constexpr const _CharT* _Parse_replacement_field(
|
|||
return _Begin + 1;
|
||||
}
|
||||
|
||||
template <class _CharT>
|
||||
_NODISCARD const _CharT* _Find_encoded(
|
||||
const _CharT* _First, const _CharT* _Last, const _CharT _Val, const _Cvtvec& _Cvt) {
|
||||
// Returns the first occurrence of _Val as an encoded character (and not, for example, as a
|
||||
// continuation byte) in [_First, _Last).
|
||||
if constexpr (_Is_execution_charset_utf8_v) {
|
||||
return _Find_unchecked(_First, _Last, _Val);
|
||||
} else {
|
||||
if (_Cvt._Mbcurmax == 1 || _Cvt._Mbcurmax == 4) {
|
||||
// As above and in _Mbrtowc, assume 4-byte encodings are UTF-8
|
||||
return _Find_unchecked(_First, _Last, _Val);
|
||||
}
|
||||
|
||||
while (_First != _Last && *_First != _Val) {
|
||||
const int _Units = _Code_units_in_next_character(_First, _Last, _Cvt);
|
||||
if (_Units < 0) {
|
||||
_THROW(format_error("Invalid encoded character in format string."));
|
||||
}
|
||||
_First += _Units;
|
||||
}
|
||||
|
||||
return _First;
|
||||
}
|
||||
}
|
||||
|
||||
template <class _CharT, _Parse_replacement_field_callbacks<_CharT> _HandlerT>
|
||||
void _Parse_format_string(basic_string_view<_CharT> _Format_str, _HandlerT&& _Handler) {
|
||||
auto _Begin = _Format_str.data();
|
||||
auto _End = _Begin + _Format_str.size();
|
||||
const _Cvtvec& _Cvt = _Getcvt();
|
||||
auto _Begin = _Format_str.data();
|
||||
auto _End = _Begin + _Format_str.size();
|
||||
const _Fmt_codec<_CharT> _Codec;
|
||||
|
||||
while (_Begin != _End) {
|
||||
const _CharT* _OpeningCurl = _Begin;
|
||||
if (*_Begin != '{') {
|
||||
_OpeningCurl = _Find_encoded(_Begin, _End, _CharT{'{'}, _Cvt);
|
||||
_OpeningCurl = _Codec._Find_encoded(_Begin, _End, _CharT{'{'});
|
||||
|
||||
for (;;) {
|
||||
const _CharT* _ClosingCurl = _Find_encoded(_Begin, _OpeningCurl, _CharT{'}'}, _Cvt);
|
||||
const _CharT* _ClosingCurl = _Codec._Find_encoded(_Begin, _OpeningCurl, _CharT{'}'});
|
||||
|
||||
// In this case there are neither closing nor opening curls in [_Begin, _OpeningCurl)
|
||||
// Write the whole thing out.
|
||||
|
@ -2214,95 +2329,15 @@ _NODISCARD _OutputIt _Fmt_write(
|
|||
return _Fmt_write(_STD move(_Out), basic_string_view<_CharT>{_Value}, _Specs, _Locale);
|
||||
}
|
||||
|
||||
inline constexpr char16_t _Width_estimate_low_intervals[] = { // Per N4885 [format.string.std]/11
|
||||
0x1100u, 0x1160u, 0x2329u, 0x232Bu, 0x2E80u, 0x303Fu, 0x3040u, 0xA4D0u, 0xAC00u, 0xD7A4u, 0xF900u, 0xFB00u, 0xFE10u,
|
||||
0xFE1Au, 0xFE30u, 0xFE70u, 0xFF00u, 0xFF61u, 0xFFE0u, 0xFFE7u};
|
||||
|
||||
inline constexpr char32_t _Width_estimate_high_intervals[] = { // Per N4885 [format.string.std]/11
|
||||
0x1F300u, 0x1F650u, 0x1F900u, 0x1FA00u, 0x20000u, 0x2FFFEu, 0x30000u, 0x3FFFEu};
|
||||
|
||||
template <auto& _Bounds>
|
||||
_NODISCARD constexpr int _Unicode_width_estimate(const char32_t _Ch) noexcept {
|
||||
// Computes the width estimation for Unicode characters from N4885 [format.string.std]/11
|
||||
int _Result = 1;
|
||||
for (const auto& _Bound : _Bounds) {
|
||||
if (_Ch < _Bound) {
|
||||
return _Result;
|
||||
}
|
||||
_Result ^= 1;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
_NODISCARD inline int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept {
|
||||
// Return an estimate for the width of the character composed of _Units code units,
|
||||
// whose first code unit is denoted by _Ptr.
|
||||
auto _Ch = static_cast<char32_t>(*_Ptr);
|
||||
switch (_Units) {
|
||||
default:
|
||||
case 1:
|
||||
case 2:
|
||||
return 1;
|
||||
case 3:
|
||||
_Ch &= 0b1111u;
|
||||
break;
|
||||
case 4:
|
||||
_Ch &= 0b111u;
|
||||
break;
|
||||
}
|
||||
|
||||
for (int _Idx = 1; _Idx < _Units; ++_Idx) {
|
||||
_Ch = _Ch << 6 | (_Ptr[_Idx] & 0b11'1111u);
|
||||
}
|
||||
|
||||
if (_Units == 3) {
|
||||
return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
|
||||
}
|
||||
|
||||
return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
|
||||
}
|
||||
|
||||
_NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Units, const _Cvtvec& _Cvt) {
|
||||
// Return an estimate for the width of the character composed of _Units code units,
|
||||
// whose first code unit is denoted by _Ptr.
|
||||
if constexpr (_Is_execution_charset_utf8_v) {
|
||||
return _Estimate_utf8_character_width(_Ptr, _Units);
|
||||
} else {
|
||||
if (_Cvt._Mbcurmax != 4) {
|
||||
// not a Unicode encoding; estimate width == number of code units
|
||||
return _Units;
|
||||
}
|
||||
|
||||
// assume UTF-8
|
||||
return _Estimate_utf8_character_width(_Ptr, _Units);
|
||||
}
|
||||
}
|
||||
|
||||
_NODISCARD inline int _Estimate_character_width(const wchar_t* _Ptr, const int _Units, const _Cvtvec&) {
|
||||
// Return an estimate for the width of the character composed of _Units code units,
|
||||
// whose first code unit is denoted by _Ptr.
|
||||
auto _Ch = static_cast<char32_t>(*_Ptr);
|
||||
if (_Units == 1) {
|
||||
return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
|
||||
}
|
||||
|
||||
// surrogate pair
|
||||
_Ch = (_Ch - 0xD8000u) << 10;
|
||||
_Ch += static_cast<char32_t>(_Ptr[1]) - 0xDC00u;
|
||||
_Ch += 0x10000u;
|
||||
return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
|
||||
}
|
||||
|
||||
template <class _CharT>
|
||||
_NODISCARD const _CharT* _Measure_string_prefix(const basic_string_view<_CharT> _Value, int& _Width) {
|
||||
// Returns a pointer past-the-end of the largest prefix of _Value that fits in _Width, or all
|
||||
// of _Value if _Width is negative. Updates _Width to the estimated width of that prefix.
|
||||
const int _Max_width = _Width;
|
||||
auto _Pos = _Value.data();
|
||||
const auto _Last = _Pos + _Value.size();
|
||||
int _Estimated_width = 0; // the estimated width of [_Value.data(), _Pos)
|
||||
const _Cvtvec& _Cvt = _Getcvt();
|
||||
const int _Max_width = _Width;
|
||||
auto _Pos = _Value.data();
|
||||
const auto _Last = _Pos + _Value.size();
|
||||
int _Estimated_width = 0; // the estimated width of [_Value.data(), _Pos)
|
||||
const _Fmt_codec<_CharT> _Codec;
|
||||
constexpr auto _Max_int = (numeric_limits<int>::max)();
|
||||
|
||||
while (_Pos != _Last) {
|
||||
|
@ -2312,8 +2347,8 @@ _NODISCARD const _CharT* _Measure_string_prefix(const basic_string_view<_CharT>
|
|||
}
|
||||
|
||||
// TRANSITION, extended grapheme clustering
|
||||
const int _Units = _Code_units_in_next_character(_Pos, _Last, _Cvt);
|
||||
const int _Character_width = _Estimate_character_width(_Pos, _Units, _Cvt);
|
||||
const int _Units = _Codec._Units_in_next_character(_Pos, _Last);
|
||||
const int _Character_width = _Codec._Estimate_width(_Pos, _Units);
|
||||
|
||||
if (_Max_int - _Character_width < _Estimated_width) { // avoid overflow
|
||||
// Either _Max_width isn't set, or adding this character will exceed it.
|
||||
|
|
|
@ -205,7 +205,7 @@ _BITMASK_OPS(__std_fs_file_flags)
|
|||
|
||||
enum class __std_fs_file_handle : intptr_t { _Invalid = -1 };
|
||||
|
||||
enum class __std_code_page : unsigned int { _Utf8 = 65001 };
|
||||
enum class __std_code_page : unsigned int { _Acp = 0, _Utf8 = 65001 };
|
||||
|
||||
struct __std_fs_convert_result {
|
||||
int _Len;
|
||||
|
|
|
@ -161,6 +161,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|||
(controlled by IncludeInLink and IncludeInImportLib). -->
|
||||
<BuildFiles Include="
|
||||
$(CrtRoot)\github\stl\src\filesystem.cpp;
|
||||
$(CrtRoot)\github\stl\src\format.cpp;
|
||||
$(CrtRoot)\github\stl\src\locale0_implib.cpp;
|
||||
$(CrtRoot)\github\stl\src\nothrow.cpp;
|
||||
$(CrtRoot)\github\stl\src\sharedmutex.cpp;
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
// Implements a win32 API wrapper for <format>
|
||||
|
||||
// This must be as small as possible, because its contents are
|
||||
// injected into the msvcprt.lib and msvcprtd.lib import libraries.
|
||||
// Do not include or define anything else here.
|
||||
// In particular, basic_string must not be included here.
|
||||
|
||||
#include <xfilesystem_abi.h>
|
||||
#include <xlocinfo.h>
|
||||
|
||||
#include <Windows.h>
|
||||
|
||||
static_assert(__std_code_page::_Acp == __std_code_page{CP_ACP});
|
||||
|
||||
extern "C" [[nodiscard]] __std_win_error __stdcall __std_get_cvt(
|
||||
const __std_code_page _Codepage, _Cvtvec* const _Pcvt) noexcept {
|
||||
// get conversion info for an arbitrary codepage
|
||||
*_Pcvt = {};
|
||||
|
||||
CPINFOEXW _Info{};
|
||||
const DWORD _Flags = 0; // reserved, must be zero
|
||||
if (GetCPInfoExW(static_cast<UINT>(_Codepage), _Flags, &_Info) == 0) {
|
||||
// NB: the only documented failure mode for GetCPInfoExW is ERROR_INVALID_PARAMETER,
|
||||
// so in practice it should never fail for CP_ACP.
|
||||
return __std_win_error{GetLastError()};
|
||||
}
|
||||
|
||||
_Pcvt->_Page = _Info.CodePage;
|
||||
_Pcvt->_Mbcurmax = _Info.MaxCharSize;
|
||||
|
||||
for (int _Idx = 0; _Idx < MAX_LEADBYTES; _Idx += 2) {
|
||||
if (_Info.LeadByte[_Idx] == 0 && _Info.LeadByte[_Idx + 1] == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned char _First = _Info.LeadByte[_Idx], _Last = _Info.LeadByte[_Idx + 1]; _First != _Last; ++_First) {
|
||||
_Pcvt->_Isleadbyte[_First >> 3] |= 1u << (_First & 0b111u);
|
||||
}
|
||||
}
|
||||
|
||||
return __std_win_error::_Success;
|
||||
}
|
|
@ -117,9 +117,10 @@ void test_parse_helper(const CharT* (*func)(const CharT*, const CharT*, callback
|
|||
callback_type&& callbacks = {}) {
|
||||
try {
|
||||
auto end = func(view.data(), view.data() + view.size(), std::move(callbacks));
|
||||
if (expected_end_position != std::basic_string_view<CharT>::npos) {
|
||||
assert(end == view.data() + expected_end_position);
|
||||
if (expected_end_position == std::basic_string_view<CharT>::npos) {
|
||||
expected_end_position = view.size();
|
||||
}
|
||||
assert(end == view.data() + expected_end_position);
|
||||
assert(!err_expected);
|
||||
} catch (const std::format_error&) {
|
||||
assert(err_expected);
|
||||
|
|
|
@ -975,31 +975,6 @@ void test_size() {
|
|||
test_size_helper<charT>(8, STR("{:8}"), STR("scully"));
|
||||
}
|
||||
|
||||
void test_multibyte_format_strings() {
|
||||
#ifndef MSVC_INTERNAL_TESTING // TRANSITION, the Windows version on Contest VMs doesn't always understand ".UTF-8"
|
||||
{
|
||||
assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
|
||||
// Filling with footballs ("\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL)
|
||||
assert(format("{:\xf0\x9f\x8f\x88>4}"sv, 42) == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x34\x32");
|
||||
|
||||
assert(format("{:\xf0\x9f\x8f\x88<4.2}", "1") == "\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
|
||||
assert(format("{:\xf0\x9f\x8f\x88^4.2}", "1") == "\xf0\x9f\x8f\x88\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
|
||||
assert(format("{:\xf0\x9f\x8f\x88>4.2}", "1") == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x31"sv);
|
||||
}
|
||||
|
||||
{
|
||||
assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
|
||||
try {
|
||||
(void) format("{:\x9f\x8f\x88<10}"sv, 42); // Bad fill character encoding: missing lead byte before \x9f
|
||||
assert(false);
|
||||
} catch (const format_error&) {
|
||||
}
|
||||
}
|
||||
#endif // MSVC_INTERNAL_TESTING
|
||||
|
||||
assert(setlocale(LC_ALL, "C") != nullptr);
|
||||
}
|
||||
|
||||
// The libfmt_ tests are derived from tests in
|
||||
// libfmt, Copyright (c) 2012 - present, Victor Zverovich
|
||||
// See NOTICE.txt for more information.
|
||||
|
@ -1318,8 +1293,6 @@ void test() {
|
|||
test_size<char>();
|
||||
test_size<wchar_t>();
|
||||
|
||||
test_multibyte_format_strings();
|
||||
|
||||
libfmt_formatter_test_escape<char>();
|
||||
libfmt_formatter_test_escape<wchar_t>();
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#define _FORMAT_CODEPAGE (__std_code_page{932})
|
||||
|
||||
#include <cassert>
|
||||
#include <clocale>
|
||||
#include <format>
|
||||
|
@ -11,55 +13,76 @@
|
|||
using namespace std;
|
||||
|
||||
void test_multibyte_format_strings() {
|
||||
{
|
||||
assert(setlocale(LC_ALL, ".932") != nullptr);
|
||||
const auto s =
|
||||
"\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
|
||||
assert(format(s) == s);
|
||||
const auto s = "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
|
||||
assert(format(s) == s);
|
||||
|
||||
assert(format("{:.2}", s) == "\x93\xfa"sv);
|
||||
assert(format("{:4.2}", s) == "\x93\xfa "sv);
|
||||
assert(format("{:.2}", s) == "\x93\xfa"sv);
|
||||
assert(format("{:4.2}", s) == "\x93\xfa "sv);
|
||||
|
||||
assert(format("{:<4.2}", s) == "\x93\xfa "sv);
|
||||
assert(format("{:^4.2}", s) == " \x93\xfa "sv);
|
||||
assert(format("{:>4.2}", s) == " \x93\xfa"sv);
|
||||
assert(format("{:<4.2}", s) == "\x93\xfa "sv);
|
||||
assert(format("{:^4.2}", s) == " \x93\xfa "sv);
|
||||
assert(format("{:>4.2}", s) == " \x93\xfa"sv);
|
||||
|
||||
assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
|
||||
assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
|
||||
assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);
|
||||
assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
|
||||
assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
|
||||
assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);
|
||||
|
||||
assert(format("{:.3}", s) == "\x93\xfa"sv);
|
||||
assert(format("{:4.3}", s) == "\x93\xfa "sv);
|
||||
assert(format("{:.3}", s) == "\x93\xfa"sv);
|
||||
assert(format("{:4.3}", s) == "\x93\xfa "sv);
|
||||
|
||||
assert(format("{:<4.3}", s) == "\x93\xfa "sv);
|
||||
assert(format("{:^4.3}", s) == " \x93\xfa "sv);
|
||||
assert(format("{:>4.3}", s) == " \x93\xfa"sv);
|
||||
assert(format("{:<4.3}", s) == "\x93\xfa "sv);
|
||||
assert(format("{:^4.3}", s) == " \x93\xfa "sv);
|
||||
assert(format("{:>4.3}", s) == " \x93\xfa"sv);
|
||||
|
||||
assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
|
||||
assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
|
||||
assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
|
||||
}
|
||||
|
||||
assert(setlocale(LC_ALL, "C") != nullptr);
|
||||
assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
|
||||
assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
|
||||
assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
|
||||
}
|
||||
|
||||
void test_parse_align() {
|
||||
auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
|
||||
const auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
|
||||
|
||||
{
|
||||
assert(setlocale(LC_ALL, ".932") != nullptr);
|
||||
test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3,
|
||||
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\x93\xfa"sv});
|
||||
test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
|
||||
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\x96\x7b"sv});
|
||||
test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
|
||||
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\x92\x6e"sv});
|
||||
test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3, //
|
||||
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\x93\xfa"sv});
|
||||
test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
|
||||
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\x96\x7b"sv});
|
||||
test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
|
||||
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\x92\x6e"sv});
|
||||
}
|
||||
|
||||
void test_width_estimation() {
|
||||
// Format strings of known width with a trailing delimiter using a precision large enough to
|
||||
// include all but the delimiter to validate the width estimation code.
|
||||
struct test_case {
|
||||
const char* str;
|
||||
int width;
|
||||
};
|
||||
constexpr test_case test_cases[] = {
|
||||
{"\x58", 1},
|
||||
{"x\x58", 2},
|
||||
|
||||
// Pick "short" and "long" codepoints (\x20 and \x96\x7b), then form all permutations of
|
||||
// 3-codepoint prefixes with the same fixed delimiter as above. This gives us coverage of
|
||||
// all adjacent pairings (short/short, short/long, long/short, long/long).
|
||||
{"\x20\x20\x20\x58", 4},
|
||||
{"\x20\x20\x96\x7b\x58", 5},
|
||||
{"\x20\x96\x7b\x20\x58", 5},
|
||||
{"\x96\x7b\x20\x20\x58", 5},
|
||||
{"\x20\x96\x7b\x96\x7b\x58", 6},
|
||||
{"\x96\x7b\x20\x96\x7b\x58", 6},
|
||||
{"\x96\x7b\x96\x7b\x20\x58", 6},
|
||||
{"\x96\x7b\x96\x7b\x96\x7b\x58", 7},
|
||||
};
|
||||
|
||||
for (const auto& test : test_cases) {
|
||||
string_view sv{test.str};
|
||||
sv = sv.substr(0, sv.size() - 1);
|
||||
assert(format("{:.{}}", test.str, test.width - 1) == sv);
|
||||
}
|
||||
|
||||
assert(setlocale(LC_ALL, "C") != nullptr);
|
||||
}
|
||||
|
||||
int main() {
|
||||
test_multibyte_format_strings();
|
||||
test_parse_align();
|
||||
test_width_estimation();
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ bool test_parse_align() {
|
|||
// \x343E (which is from CJK unified ideographs extension A) and similar characters to parse as
|
||||
// an alignment specifier.
|
||||
auto s4 = L"*\x343E"sv;
|
||||
test_parse_helper(parse_align_fn, s4, false, view_typ::npos, {.expected_fill = L"*"sv});
|
||||
test_parse_helper(parse_align_fn, s4, false, 0, {.expected_fill = L"*"sv});
|
||||
|
||||
// test multi-code-unit fill characters
|
||||
{
|
||||
|
@ -47,22 +47,6 @@ bool test_parse_align() {
|
|||
test_parse_helper(parse_align_fn, L"\U0001F3C8^X"sv, false, 3,
|
||||
{.expected_alignment = _Fmt_align::_Center, .expected_fill = L"\U0001F3C8"sv});
|
||||
}
|
||||
} else {
|
||||
// test multibyte fill characters
|
||||
#ifndef MSVC_INTERNAL_TESTING // TRANSITION, the Windows version on Contest VMs doesn't always understand ".UTF-8"
|
||||
{
|
||||
assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
|
||||
// "\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL
|
||||
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88<X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\xf0\x9f\x8f\x88"sv});
|
||||
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88>X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\xf0\x9f\x8f\x88"sv});
|
||||
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88^X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\xf0\x9f\x8f\x88"sv});
|
||||
}
|
||||
#endif // MSVC_INTERNAL_TESTING
|
||||
|
||||
assert(setlocale(LC_ALL, "C") != nullptr);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -12,12 +12,12 @@ using namespace std;
|
|||
|
||||
void test_multibyte_format_strings() {
|
||||
{
|
||||
// Filling with footballs ("\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL)
|
||||
assert(format("{:\xf0\x9f\x8f\x88>4}"sv, 42) == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x34\x32");
|
||||
// Filling with footballs ("\U0001f3c8" is U+1F3C8 AMERICAN FOOTBALL)
|
||||
assert(format("{:\U0001f3c8>4}"sv, 42) == "\U0001f3c8\U0001f3c8\x34\x32");
|
||||
|
||||
assert(format("{:\xf0\x9f\x8f\x88<4.2}", "1") == "\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
|
||||
assert(format("{:\xf0\x9f\x8f\x88^4.2}", "1") == "\xf0\x9f\x8f\x88\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
|
||||
assert(format("{:\xf0\x9f\x8f\x88>4.2}", "1") == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x31"sv);
|
||||
assert(format("{:\U0001f3c8<4.2}", "1") == "\x31\U0001f3c8\U0001f3c8\U0001f3c8"sv);
|
||||
assert(format("{:\U0001f3c8^4.2}", "1") == "\U0001f3c8\x31\U0001f3c8\U0001f3c8"sv);
|
||||
assert(format("{:\U0001f3c8>4.2}", "1") == "\U0001f3c8\U0001f3c8\U0001f3c8\x31"sv);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -33,19 +33,115 @@ void test_parse_align() {
|
|||
auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
|
||||
|
||||
{
|
||||
// "\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL
|
||||
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88<X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\xf0\x9f\x8f\x88"sv});
|
||||
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88>X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\xf0\x9f\x8f\x88"sv});
|
||||
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88^X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\xf0\x9f\x8f\x88"sv});
|
||||
test_parse_helper(parse_align_fn, "\U0001f3c8<X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\U0001f3c8"sv});
|
||||
test_parse_helper(parse_align_fn, "\U0001f3c8>X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\U0001f3c8"sv});
|
||||
test_parse_helper(parse_align_fn, "\U0001f3c8^X"sv, false, 5,
|
||||
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\U0001f3c8"sv});
|
||||
}
|
||||
}
|
||||
|
||||
template <class CharT>
|
||||
void test_width_estimation() {
|
||||
// Format strings of known width with a trailing delimiter using a precision large enough to
|
||||
// include all but the delimiter to validate the width estimation code.
|
||||
struct test_case {
|
||||
const CharT* str;
|
||||
int width;
|
||||
};
|
||||
constexpr test_case test_cases[] = {
|
||||
{TYPED_LITERAL(CharT, "\x58"), 1},
|
||||
{TYPED_LITERAL(CharT, "x\x58"), 2},
|
||||
|
||||
// test the boundaries of the intervals defined in n4885 [format.string.std]/11
|
||||
{TYPED_LITERAL(CharT, "\u10ff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\u1100\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\u115f\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\u1160\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\u2328\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\u2329\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\u232a\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\u232b\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\u2e7f\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\u2e80\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\u303e\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\u303f\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\u3040\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ua4cf\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ua4d0\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\uabff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\uac00\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ud7a3\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ud7a4\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\ud7ff\x58"), 2},
|
||||
|
||||
// skip over the surrogate pair range (\ud800-\udfff)
|
||||
|
||||
{TYPED_LITERAL(CharT, "\ue000\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\uf8ff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\uf900\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ufaff\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ufb00\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\ufe0f\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\ufe10\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ufe19\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ufe1a\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\ufe2f\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\ufe30\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ufe6f\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\ufe70\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\ufeff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\uff00\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\uff60\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\uff61\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\uffdf\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\uffe0\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\uffe6\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\uffe7\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U0001f2ff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U0001f300\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0001f64f\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0001f650\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U0001f8ff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U0001f900\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0001f9ff\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0001fa00\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U0001ffff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U00020000\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0002fffd\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0002fffe\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U0002ffff\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U00030000\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0003fffd\x58"), 3},
|
||||
{TYPED_LITERAL(CharT, "\U0003fffe\x58"), 2},
|
||||
{TYPED_LITERAL(CharT, "\U0010ffff\x58"), 2},
|
||||
|
||||
// Pick "short" and "long" codepoints (\u2000 and \ufe40), then form all permutations of
|
||||
// 3-codepoint prefixes with the same fixed delimiter as above. This gives us coverage of
|
||||
// all adjacent pairings (short/short, short/long, long/short, long/long).
|
||||
{TYPED_LITERAL(CharT, "\u2000\u2000\u2000\x58"), 4},
|
||||
{TYPED_LITERAL(CharT, "\u2000\u2000\ufe40\x58"), 5},
|
||||
{TYPED_LITERAL(CharT, "\u2000\ufe40\u2000\x58"), 5},
|
||||
{TYPED_LITERAL(CharT, "\ufe40\u2000\u2000\x58"), 5},
|
||||
{TYPED_LITERAL(CharT, "\u2000\ufe40\ufe40\x58"), 6},
|
||||
{TYPED_LITERAL(CharT, "\ufe40\u2000\ufe40\x58"), 6},
|
||||
{TYPED_LITERAL(CharT, "\ufe40\ufe40\u2000\x58"), 6},
|
||||
{TYPED_LITERAL(CharT, "\ufe40\ufe40\ufe40\x58"), 7},
|
||||
};
|
||||
|
||||
for (const auto& test : test_cases) {
|
||||
basic_string_view sv{test.str};
|
||||
sv = sv.substr(0, sv.size() - 1);
|
||||
assert(format(TYPED_LITERAL(CharT, "{:.{}}"), test.str, test.width - 1) == sv);
|
||||
}
|
||||
}
|
||||
|
||||
void run_tests() {
|
||||
test_multibyte_format_strings();
|
||||
test_parse_align();
|
||||
test_width_estimation<char>();
|
||||
test_width_estimation<wchar_t>();
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
|
Загрузка…
Ссылка в новой задаче