2018-04-09 22:02:43 +03:00
|
|
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
|
|
|
|
/* Character/text operations. */
|
|
|
|
|
|
|
|
#ifndef mozilla_TextUtils_h
|
|
|
|
#define mozilla_TextUtils_h
|
|
|
|
|
2018-04-10 21:42:54 +03:00
|
|
|
#include "mozilla/Assertions.h"
|
2018-04-09 22:02:43 +03:00
|
|
|
#include "mozilla/TypeTraits.h"
|
|
|
|
|
|
|
|
namespace mozilla {
|
|
|
|
|
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
template<typename Char>
|
|
|
|
class MakeUnsignedChar
|
|
|
|
: public MakeUnsigned<Char>
|
|
|
|
{};
|
|
|
|
|
|
|
|
template<>
|
|
|
|
class MakeUnsignedChar<char16_t>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Type = char16_t;
|
|
|
|
};
|
|
|
|
|
|
|
|
template<>
|
|
|
|
class MakeUnsignedChar<char32_t>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
using Type = char32_t;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace detail
|
|
|
|
|
2018-06-07 12:17:10 +03:00
|
|
|
/** Returns true iff |aChar| is ASCII, i.e. in the range [0, 0x80). */
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAscii(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
return uc < 0x80;
|
|
|
|
}
|
|
|
|
|
2018-09-07 08:41:29 +03:00
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsNonAsciiLatin1(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
return uc >= 0x80 && uc <= 0xFF;
|
|
|
|
}
|
|
|
|
|
2018-08-13 23:12:10 +03:00
|
|
|
/**
|
|
|
|
* Returns true iff |aChar| matches Ascii Whitespace.
|
|
|
|
*
|
|
|
|
* This function is intended to match the Infra standard
|
|
|
|
* (https://infra.spec.whatwg.org/#ascii-whitespace)
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAsciiWhitespace(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
return uc == 0x9 || uc == 0xA || uc == 0xC || uc == 0xD || uc == 0x20;
|
|
|
|
}
|
|
|
|
|
2018-04-10 21:42:54 +03:00
|
|
|
/**
|
|
|
|
* Returns true iff |aChar| matches [a-z].
|
|
|
|
*
|
|
|
|
* This function is basically what you thought islower was, except its behavior
|
|
|
|
* doesn't depend on the user's current locale.
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAsciiLowercaseAlpha(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
return 'a' <= uc && uc <= 'z';
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns true iff |aChar| matches [A-Z].
|
|
|
|
*
|
|
|
|
* This function is basically what you thought isupper was, except its behavior
|
|
|
|
* doesn't depend on the user's current locale.
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAsciiUppercaseAlpha(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
return 'A' <= uc && uc <= 'Z';
|
|
|
|
}
|
|
|
|
|
2018-04-09 22:02:43 +03:00
|
|
|
/**
|
|
|
|
* Returns true iff |aChar| matches [a-zA-Z].
|
|
|
|
*
|
|
|
|
* This function is basically what you thought isalpha was, except its behavior
|
|
|
|
* doesn't depend on the user's current locale.
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAsciiAlpha(Char aChar)
|
2018-04-10 21:42:54 +03:00
|
|
|
{
|
|
|
|
return IsAsciiLowercaseAlpha(aChar) || IsAsciiUppercaseAlpha(aChar);
|
|
|
|
}
|
|
|
|
|
2018-04-13 06:19:30 +03:00
|
|
|
/**
|
|
|
|
* Returns true iff |aChar| matches [0-9].
|
|
|
|
*
|
|
|
|
* This function is basically what you thought isdigit was, except its behavior
|
|
|
|
* doesn't depend on the user's current locale.
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAsciiDigit(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
return '0' <= uc && uc <= '9';
|
|
|
|
}
|
|
|
|
|
2018-06-21 03:21:17 +03:00
|
|
|
/**
|
|
|
|
* Returns true iff |aChar| matches [0-9a-fA-F].
|
|
|
|
*
|
|
|
|
* This function is basically isxdigit, but guaranteed to be only for ASCII.
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAsciiHexDigit(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
return ('0' <= uc && uc <= '9') ||
|
|
|
|
('a' <= uc && uc <= 'f') ||
|
|
|
|
('A' <= uc && uc <= 'F');
|
|
|
|
}
|
|
|
|
|
2018-04-10 21:42:54 +03:00
|
|
|
/**
|
|
|
|
* Returns true iff |aChar| matches [a-zA-Z0-9].
|
|
|
|
*
|
|
|
|
* This function is basically what you thought isalnum was, except its behavior
|
|
|
|
* doesn't depend on the user's current locale.
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
constexpr bool
|
|
|
|
IsAsciiAlphanumeric(Char aChar)
|
2018-04-09 22:02:43 +03:00
|
|
|
{
|
2018-04-13 06:19:30 +03:00
|
|
|
return IsAsciiDigit(aChar) || IsAsciiAlpha(aChar);
|
2018-04-10 21:42:54 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts an ASCII alphanumeric digit [0-9a-zA-Z] to number as if in base-36.
|
|
|
|
* (This function therefore works for decimal, hexadecimal, etc.).
|
|
|
|
*/
|
|
|
|
template<typename Char>
|
|
|
|
uint8_t
|
|
|
|
AsciiAlphanumericToNumber(Char aChar)
|
|
|
|
{
|
|
|
|
using UnsignedChar = typename detail::MakeUnsignedChar<Char>::Type;
|
|
|
|
auto uc = static_cast<UnsignedChar>(aChar);
|
|
|
|
|
|
|
|
if ('0' <= uc && uc <= '9') {
|
|
|
|
return uc - '0';
|
|
|
|
}
|
|
|
|
|
|
|
|
if ('A' <= uc && uc <= 'Z') {
|
|
|
|
return uc - 'A' + 10;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ideally this function would be constexpr, but unfortunately gcc at least as
|
|
|
|
// of 6.4 forbids non-constexpr function calls in unevaluated constexpr
|
|
|
|
// function calls. See bug 1453456. So for now, just assert and leave the
|
|
|
|
// entire function non-constexpr.
|
|
|
|
MOZ_ASSERT('a' <= uc && uc <= 'z',
|
|
|
|
"non-ASCII alphanumeric character can't be converted to number");
|
|
|
|
return uc - 'a' + 10;
|
2018-04-09 22:02:43 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace mozilla
|
|
|
|
|
|
|
|
#endif /* mozilla_TextUtils_h */
|