gecko-dev/xpcom/string/nsReadableUtils.cpp

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsReadableUtils.h"

#include <algorithm>

#include "mozilla/CheckedInt.h"
#include "mozilla/Utf8.h"

#include "nscore.h"
#include "nsString.h"
#include "nsTArray.h"
#include "nsUTF8Utils.h"

using mozilla::Span;

/**
 * A helper function that allocates a buffer of the desired character type big
 * enough to hold a copy of the supplied string (plus a zero terminator).
 *
 * @param aSource an string you will eventually be making a copy of
 * @return a new buffer which you must free with |free|.
 *
 */
template <class FromStringT, class CharT>
inline CharT* AllocateStringCopy(const FromStringT& aSource, CharT*) {
  return static_cast<CharT*>(
      malloc((size_t(aSource.Length()) + 1) * sizeof(CharT)));
}

char* ToNewCString(const nsAString& aSource) {
  char* str = ToNewCString(aSource, mozilla::fallible);
  if (!str) {
    MOZ_CRASH("Unable to allocate memory");
  }
  return str;
}

char* ToNewCString(const nsAString& aSource,
                   const mozilla::fallible_t& aFallible) {
  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
  if (!dest) {
    return nullptr;
  }

  auto len = aSource.Length();
  LossyConvertUtf16toLatin1(aSource, Span(dest, len));
  dest[len] = 0;
  return dest;
}

char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count,
                      const mozilla::fallible_t& aFallible) {
  auto len = aSource.Length();
  // The uses of this function seem temporary enough that it's not
  // worthwhile to be fancy about the allocation size. Let's just use
  // the worst case.
  // Times 3 plus 1, because ConvertUTF16toUTF8 requires times 3 and
  // then we have the terminator.
  // Using CheckedInt<uint32_t>, because aUTF8Count is uint32_t* for
  // historical reasons.
  mozilla::CheckedInt<uint32_t> destLen(len);
  destLen *= 3;
  destLen += 1;
  if (!destLen.isValid()) {
    return nullptr;
  }
  size_t destLenVal = destLen.value();
  char* dest = static_cast<char*>(malloc(destLenVal));
  if (!dest) {
    return nullptr;
  }

  size_t written = ConvertUtf16toUtf8(aSource, Span(dest, destLenVal));
  dest[written] = 0;

  if (aUTF8Count) {
    *aUTF8Count = written;
  }

  return dest;
}

char* ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) {
  char* str = ToNewUTF8String(aSource, aUTF8Count, mozilla::fallible);
  if (!str) {
    MOZ_CRASH("Unable to allocate memory");
  }
  return str;
}

char* ToNewCString(const nsACString& aSource) {
  char* str = ToNewCString(aSource, mozilla::fallible);
  if (!str) {
    MOZ_CRASH("Unable to allocate memory");
  }
  return str;
}

char* ToNewCString(const nsACString& aSource,
                   const mozilla::fallible_t& aFallible) {
  // no conversion needed, just allocate a buffer of the correct length and copy
  // into it

  char* dest = AllocateStringCopy(aSource, (char*)nullptr);
  if (!dest) {
    return nullptr;
  }

  auto len = aSource.Length();
  memcpy(dest, aSource.BeginReading(), len * sizeof(char));
  dest[len] = 0;
  return dest;
}

char16_t* ToNewUnicode(const nsAString& aSource) {
  char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
  if (!str) {
    MOZ_CRASH("Unable to allocate memory");
  }
  return str;
}

char16_t* ToNewUnicode(const nsAString& aSource,
                       const mozilla::fallible_t& aFallible) {
  // no conversion needed, just allocate a buffer of the correct length and copy
  // into it

  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
  if (!dest) {
    return nullptr;
  }

  auto len = aSource.Length();
  memcpy(dest, aSource.BeginReading(), len * sizeof(char16_t));
  dest[len] = 0;
  return dest;
}

char16_t* ToNewUnicode(const nsACString& aSource) {
  char16_t* str = ToNewUnicode(aSource, mozilla::fallible);
  if (!str) {
    MOZ_CRASH("Unable to allocate memory");
  }
  return str;
}

char16_t* ToNewUnicode(const nsACString& aSource,
                       const mozilla::fallible_t& aFallible) {
  char16_t* dest = AllocateStringCopy(aSource, (char16_t*)nullptr);
  if (!dest) {
    return nullptr;
  }

  auto len = aSource.Length();
  ConvertLatin1toUtf16(aSource, Span(dest, len));
  dest[len] = 0;
  return dest;
}

char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count,
                           const mozilla::fallible_t& aFallible) {
  // Compute length plus one as required by ConvertUTF8toUTF16
  uint32_t lengthPlusOne = aSource.Length() + 1;  // Can't overflow

  mozilla::CheckedInt<size_t> allocLength(lengthPlusOne);
  // Add space for zero-termination
  allocLength += 1;
  // We need UTF-16 units
  allocLength *= sizeof(char16_t);

  if (!allocLength.isValid()) {
    return nullptr;
  }

  char16_t* dest = (char16_t*)malloc(allocLength.value());
  if (!dest) {
    return nullptr;
  }

  size_t written = ConvertUtf8toUtf16(aSource, Span(dest, lengthPlusOne));
  dest[written] = 0;

  if (aUTF16Count) {
    *aUTF16Count = written;
  }

  return dest;
}

char16_t* UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) {
  char16_t* str = UTF8ToNewUnicode(aSource, aUTF16Count, mozilla::fallible);
  if (!str) {
    MOZ_CRASH("Unable to allocate memory");
  }
  return str;
}

char16_t* CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset,
                        char16_t* aDest, uint32_t aLength) {
  MOZ_ASSERT(aSrcOffset + aLength <= aSource.Length());
  memcpy(aDest, aSource.BeginReading() + aSrcOffset,
         size_t(aLength) * sizeof(char16_t));
  return aDest;
}

void ToUpperCase(nsACString& aCString) {
  char* cp = aCString.BeginWriting();
  char* end = cp + aCString.Length();
  while (cp != end) {
    char ch = *cp;
    if (ch >= 'a' && ch <= 'z') {
      *cp = ch - ('a' - 'A');
    }
    ++cp;
  }
}

void ToUpperCase(const nsACString& aSource, nsACString& aDest) {
  aDest.SetLength(aSource.Length());
  const char* src = aSource.BeginReading();
  const char* end = src + aSource.Length();
  char* dst = aDest.BeginWriting();
  while (src != end) {
    char ch = *src;
    if (ch >= 'a' && ch <= 'z') {
      *dst = ch - ('a' - 'A');
    } else {
      *dst = ch;
    }
    ++src;
    ++dst;
  }
}

void ToLowerCase(nsACString& aCString) {
  char* cp = aCString.BeginWriting();
  char* end = cp + aCString.Length();
  while (cp != end) {
    char ch = *cp;
    if (ch >= 'A' && ch <= 'Z') {
      *cp = ch + ('a' - 'A');
    }
    ++cp;
  }
}

void ToLowerCase(const nsACString& aSource, nsACString& aDest) {
  aDest.SetLength(aSource.Length());
  const char* src = aSource.BeginReading();
  const char* end = src + aSource.Length();
  char* dst = aDest.BeginWriting();
  while (src != end) {
    char ch = *src;
    if (ch >= 'A' && ch <= 'Z') {
      *dst = ch + ('a' - 'A');
    } else {
      *dst = ch;
    }
    ++src;
    ++dst;
  }
}

void ParseString(const nsACString& aSource, char aDelimiter,
                 nsTArray<nsCString>& aArray) {
  nsACString::const_iterator start, end;
  aSource.BeginReading(start);
  aSource.EndReading(end);

  for (;;) {
    nsACString::const_iterator delimiter = start;
    FindCharInReadable(aDelimiter, delimiter, end);

    if (delimiter != start) {
      aArray.AppendElement(Substring(start, delimiter));
    }

    if (delimiter == end) {
      break;
    }
    start = ++delimiter;
    if (start == end) {
      break;
    }
  }
}

template <class StringT, class IteratorT>
bool FindInReadable_Impl(
    const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
    nsTStringComparator<typename StringT::char_type> aCompare) {
  bool found_it = false;

  // only bother searching at all if we're given a non-empty range to search
  if (aSearchStart != aSearchEnd) {
    IteratorT aPatternStart, aPatternEnd;
    aPattern.BeginReading(aPatternStart);
    aPattern.EndReading(aPatternEnd);

    // outer loop keeps searching till we find it or run out of string to search
    while (!found_it) {
      // fast inner loop (that's what it's called, not what it is) looks for a
      // potential match
      while (aSearchStart != aSearchEnd &&
             aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
        ++aSearchStart;
      }

      // if we broke out of the `fast' loop because we're out of string ...
      // we're done: no match
      if (aSearchStart == aSearchEnd) {
        break;
      }

      // otherwise, we're at a potential match, let's see if we really hit one
      IteratorT testPattern(aPatternStart);
      IteratorT testSearch(aSearchStart);

      // slow inner loop verifies the potential match (found by the `fast' loop)
      // at the current position
      for (;;) {
        // we already compared the first character in the outer loop,
        //  so we'll advance before the next comparison
        ++testPattern;
        ++testSearch;

        // if we verified all the way to the end of the pattern, then we found
        // it!
        if (testPattern == aPatternEnd) {
          found_it = true;
          aSearchEnd = testSearch;  // return the exact found range through the
                                    // parameters
          break;
        }

        // if we got to end of the string we're searching before we hit the end
        // of the
        //  pattern, we'll never find what we're looking for
        if (testSearch == aSearchEnd) {
          aSearchStart = aSearchEnd;
          break;
        }

        // else if we mismatched ... it's time to advance to the next search
        // position
        //  and get back into the `fast' loop
        if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
          ++aSearchStart;
          break;
        }
      }
    }
  }

  return found_it;
}

/**
 * This searches the entire string from right to left, and returns the first
 * match found, if any.
 */
template <class StringT, class IteratorT>
bool RFindInReadable_Impl(
    const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd,
    nsTStringComparator<typename StringT::char_type> aCompare) {
  IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
  aPattern.BeginReading(patternStart);
  aPattern.EndReading(patternEnd);

  // Point to the last character in the pattern
  --patternEnd;
  // outer loop keeps searching till we run out of string to search
  while (aSearchStart != searchEnd) {
    // Point to the end position of the next possible match
    --searchEnd;

    // Check last character, if a match, explore further from here
    if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
      // We're at a potential match, let's see if we really hit one
      IteratorT testPattern(patternEnd);
      IteratorT testSearch(searchEnd);

      // inner loop verifies the potential match at the current position
      do {
        // if we verified all the way to the end of the pattern, then we found
        // it!
        if (testPattern == patternStart) {
          aSearchStart = testSearch;  // point to start of match
          aSearchEnd = ++searchEnd;   // point to end of match
          return true;
        }

        // if we got to end of the string we're searching before we hit the end
        // of the
        //  pattern, we'll never find what we're looking for
        if (testSearch == aSearchStart) {
          aSearchStart = aSearchEnd;
          return false;
        }

        // test previous character for a match
        --testPattern;
        --testSearch;
      } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
    }
  }

  aSearchStart = aSearchEnd;
  return false;
}

bool FindInReadable(const nsAString& aPattern,
                    nsAString::const_iterator& aSearchStart,
                    nsAString::const_iterator& aSearchEnd,
                    nsStringComparator aComparator) {
  return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}

bool FindInReadable(const nsACString& aPattern,
                    nsACString::const_iterator& aSearchStart,
                    nsACString::const_iterator& aSearchEnd,
                    nsCStringComparator aComparator) {
  return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}

bool CaseInsensitiveFindInReadable(const nsACString& aPattern,
                                   nsACString::const_iterator& aSearchStart,
                                   nsACString::const_iterator& aSearchEnd) {
  return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
                             nsCaseInsensitiveCStringComparator);
}

bool RFindInReadable(const nsAString& aPattern,
                     nsAString::const_iterator& aSearchStart,
                     nsAString::const_iterator& aSearchEnd,
                     const nsStringComparator aComparator) {
  return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}

bool RFindInReadable(const nsACString& aPattern,
                     nsACString::const_iterator& aSearchStart,
                     nsACString::const_iterator& aSearchEnd,
                     const nsCStringComparator aComparator) {
  return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
}

bool FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
                        const nsAString::const_iterator& aSearchEnd) {
  ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();

  const char16_t* charFoundAt =
      nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
  if (charFoundAt) {
    aSearchStart.advance(charFoundAt - aSearchStart.get());
    return true;
  }

  aSearchStart.advance(fragmentLength);
  return false;
}

bool FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
                        const nsACString::const_iterator& aSearchEnd) {
  ptrdiff_t fragmentLength = aSearchEnd.get() - aSearchStart.get();

  const char* charFoundAt =
      nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
  if (charFoundAt) {
    aSearchStart.advance(charFoundAt - aSearchStart.get());
    return true;
  }

  aSearchStart.advance(fragmentLength);
  return false;
}

bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) {
  nsAString::size_type src_len = aSource.Length(),
                       sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, 0, sub_len).Equals(aSubstring);
}

bool StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
                      nsStringComparator aComparator) {
  nsAString::size_type src_len = aSource.Length(),
                       sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
}

bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) {
  nsACString::size_type src_len = aSource.Length(),
                        sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, 0, sub_len).Equals(aSubstring);
}

bool StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
                      nsCStringComparator aComparator) {
  nsACString::size_type src_len = aSource.Length(),
                        sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
}

bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) {
  nsAString::size_type src_len = aSource.Length(),
                       sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
}

bool StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
                    nsStringComparator aComparator) {
  nsAString::size_type src_len = aSource.Length(),
                       sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, src_len - sub_len, sub_len)
      .Equals(aSubstring, aComparator);
}

bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) {
  nsACString::size_type src_len = aSource.Length(),
                        sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
}

bool StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
                    nsCStringComparator aComparator) {
  nsACString::size_type src_len = aSource.Length(),
                        sub_len = aSubstring.Length();
  if (sub_len > src_len) {
    return false;
  }
  return Substring(aSource, src_len - sub_len, sub_len)
      .Equals(aSubstring, aComparator);
}

static const char16_t empty_buffer[1] = {'\0'};

const nsString& EmptyString() {
  static const nsDependentString sEmpty(empty_buffer);

  return sEmpty;
}

const nsCString& EmptyCString() {
  static const nsDependentCString sEmpty((const char*)empty_buffer);

  return sEmpty;
}

const nsString& VoidString() {
  static const nsString sNull(mozilla::detail::StringDataFlags::VOIDED);

  return sNull;
}

const nsCString& VoidCString() {
  static const nsCString sNull(mozilla::detail::StringDataFlags::VOIDED);

  return sNull;
}

int32_t CompareUTF8toUTF16(const nsACString& aUTF8String,
                           const nsAString& aUTF16String, bool* aErr) {
  const char* u8;
  const char* u8end;
  aUTF8String.BeginReading(u8);
  aUTF8String.EndReading(u8end);

  const char16_t* u16;
  const char16_t* u16end;
  aUTF16String.BeginReading(u16);
  aUTF16String.EndReading(u16end);

  for (;;) {
    if (u8 == u8end) {
      if (u16 == u16end) {
        return 0;
      }
      return -1;
    }
    if (u16 == u16end) {
      return 1;
    }
    // No need for ASCII optimization, since both NextChar()
    // calls get inlined.
    uint32_t scalar8 = UTF8CharEnumerator::NextChar(&u8, u8end, aErr);
    uint32_t scalar16 = UTF16CharEnumerator::NextChar(&u16, u16end, aErr);
    if (scalar16 == scalar8) {
      continue;
    }
    if (scalar8 < scalar16) {
      return -1;
    }
    return 1;
  }
}

void AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) {
  NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
  if (IS_IN_BMP(aSource)) {
    aDest.Append(char16_t(aSource));
  } else {
    aDest.Append(H_SURROGATE(aSource));
    aDest.Append(L_SURROGATE(aSource));
  }
}