gecko-dev/xpcom/string/nsReadableUtils.cpp

632 строки
18 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsReadableUtils.h"
#include <algorithm>
#include "mozilla/CheckedInt.h"
#include "mozilla/Utf8.h"
#include "nscore.h"
#include "nsMemory.h"
#include "nsString.h"
#include "nsTArray.h"
#include "nsUTF8Utils.h"
using mozilla::Span;
/**
* A helper function that allocates a buffer of the desired character type big
* enough to hold a copy of the supplied string (plus a zero terminator).
*
* @param aSource an string you will eventually be making a copy of
* @return a new buffer which you must free with |free|.
*
*/
template <class FromStringT, class CharT>
inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) {
return static_cast<CharT*>(
malloc((size_t(aSource.Length()) + 1) * sizeof(CharT)));
}
char* ToNewCString(const nsAString& aSource) {
char* str = ToNewCString(aSource, mozilla::fallible);
if (!str) {
MOZ_CRASH("Unable to allocate memory");
}
return str;
}
char* ToNewCString(const nsAString& aSource,
const mozilla::fallible_t& aFallible) {
char* dest = AllocateStringCopy(aSource, (char*)nullptr);
if (!dest) {
return nullptr;
}
auto len = aSource.Length();
LossyConvertUtf16toLatin1(aSource, Span(dest, len));
dest[len] = 0;
return dest;
}
char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count,
const mozilla::fallible_t& aFallible) {
auto len = aSource.Length();
// The uses of this function seem temporary enough that it's not
// worthwhile to be fancy about the allocation size. Let's just use
// the worst case.
// Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and
// then we have the terminator.
// Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for
// historical reasons.
mozilla::CheckedInt<uint32_t> destLen(len);
destLen *= 3;
destLen += 1;
if (!destLen.isValid()) {
return nullptr;
}
size_t destLenVal = destLen.value();
char* dest = static_cast<char*>(malloc(destLenVal));
if (!dest) {
return nullptr;
}
size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal));
dest[written] = 0;
if (aUTF8Count) {
*aUTF8Count = written;
}
return dest;
}
char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) {
char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible);
if (!str) {
MOZ_CRASH("Unable to allocate memory");
}
return str;
}
char* ToNewCString(const nsACString& aSource) {
char* str = ToNewCString(aSource, mozilla::fallible);
if (!str) {
MOZ_CRASH("Unable to allocate memory");
}
return str;
}
char* ToNewCString(const nsACString& aSource,
const mozilla::fallible_t& aFallible) {
// no conversion needed, just allocate a buffer of the correct length and copy
// into it
char* dest = AllocateStringCopy(aSource, (char*)nullptr);
if (!dest) {
return nullptr;
}
auto len = aSource.Length();
memcpy(dest, aSource.BeginReading(), len * sizeof(char));
dest[len] = 0;
return dest;
}
char16_t* ToNewUnicode(const nsAString& aSource) {
char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
if (!str) {
MOZ_CRASH("Unable to allocate memory");
}
return str;
}
char16_t* ToNewUnicode(const nsAString& aSource,
const mozilla::fallible_t& aFallible) {
// no conversion needed, just allocate a buffer of the correct length and copy
// into it
char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
if (!dest) {
return nullptr;
}
auto len = aSource.Length();
memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t));
dest[len] = 0;
return dest;
}
char16_t* ToNewUnicode(const nsACString& aSource) {
char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
if (!str) {
MOZ_CRASH("Unable to allocate memory");
}
return str;
}
char16_t* ToNewUnicode(const nsACString& aSource,
const mozilla::fallible_t& aFallible) {
char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
if (!dest) {
return nullptr;
}
auto len = aSource.Length();
ConvertLatin1toUtf16(aSource, Span(dest, len));
dest[len] = 0;
return dest;
}
char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count,
const mozilla::fallible_t& aFallible) {
// Compute length plus one as required by ConvertUTF8toUTF16
uint32_t lengthPlusOne = aSource.Length() + 1; // Can't overflow
mozilla::CheckedInt<size_t> allocLength(lengthPlusOne);
// Add space for zero-termination
allocLength += 1;
// We need UTF-16 units
allocLength *= sizeof(char16_t);
if (!allocLength.isValid()) {
return nullptr;
}
char16_t* dest = (char16_t*)malloc(allocLength.value());
if (!dest) {
return nullptr;
}
size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne));
dest[written] = 0;
if (aUTF16Count) {
*aUTF16Count = written;
}
return dest;
}
char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) {
char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible);
if (!str) {
MOZ_CRASH("Unable to allocate memory");
}
return str;
}
char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset,
char16_t* aDest, uint32_t aLength) {
MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length());
memcpy(aDest, aSource.BeginReading() + aSrcOffset,
size_t(aLength) * sizeof(char16_t));
return aDest;
}
void ToUpperCase(nsACString& aCString) {
char* cp = aCString.BeginWriting();
char* end = cp + aCString.Length();
while (cp != end) {
char ch = *cp;
if (ch >= 'a' && ch <= 'z') {
*cp = ch - ('a' - 'A');
}
++cp;
}
}
void ToUpperCase(const nsACString& aSource, nsACString& aDest) {
aDest.SetLength(aSource.Length());
const char* src = aSource.BeginReading();
const char* end = src + aSource.Length();
char* dst = aDest.BeginWriting();
while (src != end) {
char ch = *src;
if (ch >= 'a' && ch <= 'z') {
*dst = ch - ('a' - 'A');
} else {
*dst = ch;
}
++src;
++dst;
}
}
void ToLowerCase(nsACString& aCString) {
char* cp = aCString.BeginWriting();
char* end = cp + aCString.Length();
while (cp != end) {
char ch = *cp;
if (ch >= 'A' && ch <= 'Z') {
*cp = ch + ('a' - 'A');
}
++cp;
}
}
void ToLowerCase(const nsACString& aSource, nsACString& aDest) {
aDest.SetLength(aSource.Length());
const char* src = aSource.BeginReading();
const char* end = src + aSource.Length();
char* dst = aDest.BeginWriting();
while (src != end) {
char ch = *src;
if (ch >= 'A' && ch <= 'Z') {
*dst = ch + ('a' - 'A');
} else {
*dst = ch;
}
++src;
++dst;
}
}
void ParseString(const nsACString& aSource, char aDelimiter,
nsTArray<nsCString>& aArray) {
nsACString::const_iterator start, end;
aSource.BeginReading(start);
aSource.EndReading(end);
for (;;) {
nsACString::const_iterator delimiter = start;
FindCharInReadable(aDelimiter, delimiter, end);
if (delimiter != start) {
aArray.AppendElement(Substring(start, delimiter));
}
if (delimiter == end) {
break;
}
start = ++delimiter;
if (start == end) {
break;
}
}
}
template <class StringT, class IteratorT>
bool FindInReadable_Impl(
const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
nsTStringComparator<typename StringT::char_type> aCompare) {
bool found_it = false;
// only bother searching at all if we're given a non-empty range to search
if (aSearchStart != aSearchEnd) {
IteratorT aPatternStart, aPatternEnd;
aPattern.BeginReading(aPatternStart);
aPattern.EndReading(aPatternEnd);
// outer loop keeps searching till we find it or run out of string to search
while (!found_it) {
// fast inner loop (that's what it's called, not what it is) looks for a
// potential match
while (aSearchStart != aSearchEnd &&
aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
++aSearchStart;
}
// if we broke out of the `fast' loop because we're out of string ...
// we're done: no match
if (aSearchStart == aSearchEnd) {
break;
}
// otherwise, we're at a potential match, let's see if we really hit one
IteratorT testPattern(aPatternStart);
IteratorT testSearch(aSearchStart);
// slow inner loop verifies the potential match (found by the `fast' loop)
// at the current position
for (;;) {
// we already compared the first character in the outer loop,
// so we'll advance before the next comparison
++testPattern;
++testSearch;
// if we verified all the way to the end of the pattern, then we found
// it!
if (testPattern == aPatternEnd) {
found_it = true;
aSearchEnd = testSearch; // return the exact found range through the
// parameters
break;
}
// if we got to end of the string we're searching before we hit the end
// of the
// pattern, we'll never find what we're looking for
if (testSearch == aSearchEnd) {
aSearchStart = aSearchEnd;
break;
}
// else if we mismatched ... it's time to advance to the next search
// position
// and get back into the `fast' loop
if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
++aSearchStart;
break;
}
}
}
}
return found_it;
}
/**
* This searches the entire string from right to left, and returns the first
* match found, if any.
*/
template <class StringT, class IteratorT>
bool RFindInReadable_Impl(
const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
nsTStringComparator<typename StringT::char_type> aCompare) {
IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
aPattern.BeginReading(patternStart);
aPattern.EndReading(patternEnd);
// Point to the last character in the pattern
--patternEnd;
// outer loop keeps searching till we run out of string to search
while (aSearchStart != searchEnd) {
// Point to the end position of the next possible match
--searchEnd;
// Check last character, if a match, explore further from here
if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
// We're at a potential match, let's see if we really hit one
IteratorT testPattern(patternEnd);
IteratorT testSearch(searchEnd);
// inner loop verifies the potential match at the current position
do {
// if we verified all the way to the end of the pattern, then we found
// it!
if (testPattern == patternStart) {
aSearchStart = testSearch; // point to start of match
aSearchEnd = ++searchEnd; // point to end of match
return true;
}
// if we got to end of the string we're searching before we hit the end
// of the
// pattern, we'll never find what we're looking for
if (testSearch == aSearchStart) {
aSearchStart = aSearchEnd;
return false;
}
// test previous character for a match
--testPattern;
--testSearch;
} while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
}
}
aSearchStart = aSearchEnd;
return false;
}
bool FindInReadable(const nsAString& aPattern,
nsAString::const_iterator& aSearchStart,
nsAString::const_iterator& aSearchEnd,
nsStringComparator aComparator) {
return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}
bool FindInReadable(const nsACString& aPattern,
nsACString::const_iterator& aSearchStart,
nsACString::const_iterator& aSearchEnd,
nsCStringComparator aComparator) {
return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}
bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
nsACString::const_iterator& aSearchStart,
nsACString::const_iterator& aSearchEnd) {
return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
nsCaseInsensitiveCStringComparator);
}
bool RFindInReadable(const nsAString& aPattern,
nsAString::const_iterator& aSearchStart,
nsAString::const_iterator& aSearchEnd,
const nsStringComparator aComparator) {
return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}
bool RFindInReadable(const nsACString& aPattern,
nsACString::const_iterator& aSearchStart,
nsACString::const_iterator& aSearchEnd,
const nsCStringComparator aComparator) {
return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}
bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
const nsAString::const_iterator& aSearchEnd) {
ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
const char16_t* charFoundAt =
nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
if (charFoundAt) {
aSearchStart.advance(charFoundAt - aSearchStart.get());
return true;
}
aSearchStart.advance(fragmentLength);
return false;
}
bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
const nsACString::const_iterator& aSearchEnd) {
ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
const char* charFoundAt =
nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
if (charFoundAt) {
aSearchStart.advance(charFoundAt - aSearchStart.get());
return true;
}
aSearchStart.advance(fragmentLength);
return false;
}
bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) {
nsAString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, 0, sub_len).Equals(aSubstring);
}
bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
nsStringComparator aComparator) {
nsAString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
}
bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) {
nsACString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, 0, sub_len).Equals(aSubstring);
}
bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
nsCStringComparator aComparator) {
nsACString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
}
bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) {
nsAString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
}
bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
nsStringComparator aComparator) {
nsAString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, src_len - sub_len, sub_len)
.Equals(aSubstring, aComparator);
}
bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) {
nsACString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
}
bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
nsCStringComparator aComparator) {
nsACString::size_type src_len = aSource.Length(),
sub_len = aSubstring.Length();
if (sub_len > src_len) {
return false;
}
return Substring(aSource, src_len - sub_len, sub_len)
.Equals(aSubstring, aComparator);
}
static const char16_t empty_buffer[1] = {'\0'};
const nsString& EmptyString() {
static const nsDependentString sEmpty(empty_buffer);
return sEmpty;
}
const nsCString& EmptyCString() {
static const nsDependentCString sEmpty((const char*)empty_buffer);
return sEmpty;
}
const nsString& VoidString() {
static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED);
return sNull;
}
const nsCString& VoidCString() {
static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED);
return sNull;
}
int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
const nsAString& aUTF16String, bool* aErr) {
const char* u8;
const char* u8end;
aUTF8String.BeginReading(u8);
aUTF8String.EndReading(u8end);
const char16_t* u16;
const char16_t* u16end;
aUTF16String.BeginReading(u16);
aUTF16String.EndReading(u16end);
for (;;) {
if (u8 == u8end) {
if (u16 == u16end) {
return 0;
}
return -1;
}
if (u16 == u16end) {
return 1;
}
// No need for ASCII optimization, since both NextChar()
// calls get inlined.
uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr);
uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr);
if (scalar16 == scalar8) {
continue;
}
if (scalar8 < scalar16) {
return -1;
}
return 1;
}
}
void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) {
NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
if (IS_IN_BMP(aSource)) {
aDest.Append(char16_t(aSource));
} else {
aDest.Append(H_SURROGATE(aSource));
aDest.Append(L_SURROGATE(aSource));
}
}