Backed out changeset 2c6bb36b0667 (bug 1170668) for causing build bustages on TestShortRead.cpp CLOSED TREE

2022-08-11 00:01:03 +03:00 · 2022-08-11 00:01:03 +03:00 · 948922a968
--- a/intl/uconv/nsConverterInputStream.cpp
+++ b/intl/uconv/nsConverterInputStream.cpp
@ -105,38 +105,39 @@ nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
                                     void* aClosure, uint32_t aCount,
                                     uint32_t* aReadCount) {
  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
-  uint32_t codeUnitsToWrite = mUnicharDataLength - mUnicharDataOffset;
-  if (0 == codeUnitsToWrite) {
+  uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
+  nsresult rv;
+  if (0 == bytesToWrite) {
    // Fill the unichar buffer
-    codeUnitsToWrite = Fill(&mLastErrorCode);
-    if (codeUnitsToWrite == 0) {
+    bytesToWrite = Fill(&rv);
+    if (bytesToWrite <= 0) {
      *aReadCount = 0;
-      return mLastErrorCode;
+      return rv;
+    }
+    if (NS_FAILED(rv)) {
+      return rv;
    }
  }

-  if (codeUnitsToWrite > aCount) {
-    codeUnitsToWrite = aCount;
-  }
+  if (bytesToWrite > aCount) bytesToWrite = aCount;

-  uint32_t codeUnitsWritten;
-  uint32_t totalCodeUnitsWritten = 0;
+  uint32_t bytesWritten;
+  uint32_t totalBytesWritten = 0;

-  while (codeUnitsToWrite) {
-    nsresult rv =
-        aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
-                totalCodeUnitsWritten, codeUnitsToWrite, &codeUnitsWritten);
+  while (bytesToWrite) {
+    rv = aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
+                 totalBytesWritten, bytesToWrite, &bytesWritten);
    if (NS_FAILED(rv)) {
      // don't propagate errors to the caller
      break;
    }

-    codeUnitsToWrite -= codeUnitsWritten;
-    totalCodeUnitsWritten += codeUnitsWritten;
-    mUnicharDataOffset += codeUnitsWritten;
+    bytesToWrite -= bytesWritten;
+    totalBytesWritten += bytesWritten;
+    mUnicharDataOffset += bytesWritten;
  }

-  *aReadCount = totalCodeUnitsWritten;
+  *aReadCount = totalBytesWritten;

  return NS_OK;
 }
@ -165,7 +166,7 @@ nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
 }

 uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
-  if (!mInput) {
+  if (nullptr == mInput) {
    // We already closed the stream!
    *aErrorCode = NS_BASE_STREAM_CLOSED;
    return 0;
@ -178,72 +179,54 @@ uint32_t nsConverterInputStream::Fill(nsresult* aErrorCode) {
    return 0;
  }

+  // We assume a many to one conversion and are using equal sizes for
+  // the two buffers.  However if an error happens at the very start
+  // of a byte buffer we may end up in a situation where n bytes lead
+  // to n+1 unicode chars.  Thus we need to keep track of the leftover
+  // bytes as we convert.
+
+  uint32_t nb;
+  *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
+  if (nb == 0 && mLeftOverBytes == 0) {
+    // No more data
+    *aErrorCode = NS_OK;
+    return 0;
+  }
+
+  NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
+               "mByteData is lying to us somewhere");
+
+  // Now convert as much of the byte buffer to unicode as possible
+  auto src = AsBytes(Span(mByteData));
+  auto dst = Span(mUnicharData);
  // mUnicharData.Length() is the buffer length, not the fill status.
  // mUnicharDataLength reflects the current fill status.
  mUnicharDataLength = 0;
  // Whenever we convert, mUnicharData is logically empty.
  mUnicharDataOffset = 0;
-
-  // Continue trying to read from the source stream until we successfully decode
-  // a character or encounter an error, as returning `0` here implies that the
-  // stream is complete.
-  //
-  // If the converter has been cleared, we've fully consumed the stream, and
-  // want to report EOF.
-  while (mUnicharDataLength == 0 && mConverter) {
-    // We assume a many to one conversion and are using equal sizes for
-    // the two buffers.  However if an error happens at the very start
-    // of a byte buffer we may end up in a situation where n bytes lead
-    // to n+1 unicode chars.  Thus we need to keep track of the leftover
-    // bytes as we convert.
-
-    uint32_t nb;
-    *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
-    if (NS_FAILED(*aErrorCode)) {
-      return 0;
-    }
-
-    NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
-                 "mByteData is lying to us somewhere");
-
-    // If `NS_FillArray` failed to read any new bytes, this is the last read,
-    // and we're at the end of the stream.
-    bool last = (nb == 0);
-
-    // Now convert as much of the byte buffer to unicode as possible
-    auto src = AsBytes(Span(mByteData));
-    auto dst = Span(mUnicharData);
-
-    // Truncation from size_t to uint32_t below is OK, because the sizes
-    // are bounded by the lengths of mByteData and mUnicharData.
-    uint32_t result;
-    size_t read;
-    size_t written;
-    if (mErrorsAreFatal) {
-      std::tie(result, read, written) =
-          mConverter->DecodeToUTF16WithoutReplacement(src, dst, last);
-    } else {
-      std::tie(result, read, written, std::ignore) =
-          mConverter->DecodeToUTF16(src, dst, last);
-    }
-    mLeftOverBytes = mByteData.Length() - read;
-    mUnicharDataLength = written;
-    // Clear `mConverter` if we reached the end of the stream, as we can't
-    // call methods on it anymore. This will also signal EOF to the caller
-    // through the loop condition.
-    if (last) {
-      MOZ_ASSERT(mLeftOverBytes == 0,
-                 "Failed to read all bytes on the last pass?");
-      mConverter = nullptr;
-    }
-    // If we got a decode error, we're done.
-    if (result != kInputEmpty && result != kOutputFull) {
-      MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
-      *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
-      return 0;
-    }
+  // Truncation from size_t to uint32_t below is OK, because the sizes
+  // are bounded by the lengths of mByteData and mUnicharData.
+  uint32_t result;
+  size_t read;
+  size_t written;
+  // The design of this class is fundamentally bogus in that trailing
+  // errors are ignored. Always passing false as the last argument to
+  // Decode* calls below.
+  if (mErrorsAreFatal) {
+    std::tie(result, read, written) =
+        mConverter->DecodeToUTF16WithoutReplacement(src, dst, false);
+  } else {
+    std::tie(result, read, written, std::ignore) =
+        mConverter->DecodeToUTF16(src, dst, false);
+  }
+  mLeftOverBytes = mByteData.Length() - read;
+  mUnicharDataLength = written;
+  if (result == kInputEmpty || result == kOutputFull) {
+    *aErrorCode = NS_OK;
+  } else {
+    MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
+    *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
  }
-  *aErrorCode = NS_OK;
  return mUnicharDataLength;
 }

--- a/intl/uconv/tests/gtest/TestShortRead.cpp
+++ b/intl/uconv/tests/gtest/TestShortRead.cpp
@ -1,106 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-/* vim: set ts=8 sts=2 et sw=2 tw=80: */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this file,
- * You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "gtest/gtest.h"
-#include "mozilla/ErrorNames.h"
-#include "nsCOMPtr.h"
-#include "nsConverterInputStream.h"
-#include "nsIInputStream.h"
-#include "nsISupports.h"
-#include "nsStringStream.h"
-
-namespace {
-
-class ShortReadWrapper final : public nsIInputStream {
- public:
-  NS_DECL_THREADSAFE_ISUPPORTS
-  NS_DECL_NSIINPUTSTREAM
-
-  template <size_t N>
-  ShortReadWrapper(const uint32_t (&aShortReads)[N],
-                   nsIInputStream* aBaseStream)
-      : mShortReadIter(std::begin(aShortReads)),
-        mShortReadEnd(std::end(aShortReads)),
-        mBaseStream(aBaseStream) {}
-
-  ShortReadWrapper(const ShortReadWrapper&) = delete;
-  ShortReadWrapper& operator=(const ShortReadWrapper&) = delete;
-
- private:
-  ~ShortReadWrapper() = default;
-
-  const uint32_t* mShortReadIter;
-  const uint32_t* mShortReadEnd;
-  nsCOMPtr<nsIInputStream> mBaseStream;
-};
-
-NS_IMPL_ISUPPORTS(ShortReadWrapper, nsIInputStream)
-
-NS_IMETHODIMP
-ShortReadWrapper::Close() { return mBaseStream->Close(); }
-
-NS_IMETHODIMP
-ShortReadWrapper::Available(uint64_t* aAvailable) {
-  nsresult rv = mBaseStream->Available(aAvailable);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  if (mShortReadIter != mShortReadEnd) {
-    *aAvailable = std::min(uint64_t(*mShortReadIter), *aAvailable);
-  }
-  return NS_OK;
-}
-
-NS_IMETHODIMP
-ShortReadWrapper::Read(char* aBuf, uint32_t aCount, uint32_t* _retval) {
-  if (mShortReadIter != mShortReadEnd) {
-    aCount = std::min(*mShortReadIter, aCount);
-  }
-
-  nsresult rv = mBaseStream->Read(aBuf, aCount, _retval);
-  if (NS_SUCCEEDED(rv) && mShortReadIter != mShortReadEnd) {
-    ++mShortReadIter;
-  }
-  return rv;
-}
-
-NS_IMETHODIMP
-ShortReadWrapper::ReadSegments(nsWriteSegmentFun aWriter, void* aClosure,
-                               uint32_t aCount, uint32_t* _retval) {
-  return NS_ERROR_NOT_IMPLEMENTED;
-}
-
-NS_IMETHODIMP
-ShortReadWrapper::IsNonBlocking(bool* _retval) {
-  return mBaseStream->IsNonBlocking(_retval);
-}
-
-}  // namespace
-
-TEST(ConverterStreamShortRead, ShortRead)
-{
-  uint8_t bytes[] = {0xd8, 0x35, 0xdc, 0x20};
-  nsCOMPtr<nsIInputStream> baseStream;
-  ASSERT_TRUE(NS_SUCCEEDED(NS_NewByteInputStream(getter_AddRefs(baseStream),
-                                                 AsChars(mozilla::Span(bytes)),
-                                                 NS_ASSIGNMENT_COPY)));
-
-  static const uint32_t kShortReads[] = {1, 2, 1};
-  nsCOMPtr<nsIInputStream> shortStream =
-      new ShortReadWrapper(kShortReads, baseStream);
-
-  RefPtr<nsConverterInputStream> unicharStream = new nsConverterInputStream();
-  ASSERT_TRUE(NS_SUCCEEDED(
-      unicharStream->Init(shortStream, "UTF-16BE", 4096,
-                          nsIConverterInputStream::ERRORS_ARE_FATAL)));
-
-  uint32_t read;
-  nsAutoString result;
-  ASSERT_TRUE(
-      NS_SUCCEEDED(unicharStream->ReadString(UINT32_MAX, result, &read)));
-
-  ASSERT_EQ(read, 2u);
-  ASSERT_TRUE(result == u"\u{1d420}");
-}
--- a/intl/uconv/tests/gtest/moz.build
+++ b/intl/uconv/tests/gtest/moz.build
@ -1,11 +0,0 @@
-# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
-# vim: set filetype=python:
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-UNIFIED_SOURCES += [
-    "TestShortRead.cpp",
-]
-
-FINAL_LIBRARY = "xul-gtest"
--- a/intl/uconv/tests/moz.build
+++ b/intl/uconv/tests/moz.build
@ -4,10 +4,6 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.

-TEST_DIRS += [
-    "gtest",
-]
-
 XPCSHELL_TESTS_MANIFESTS += ["unit/xpcshell.ini"]

 MOCHITEST_MANIFESTS += ["mochitest.ini"]
--- a/intl/uconv/tests/unit/test_bug317216.js
+++ b/intl/uconv/tests/unit/test_bug317216.js
@ -75,14 +75,14 @@ const test = [
  // 10: Lone high surrogate at the end of the input
  [
    "%D8%35%",
-    //    expected: one replacement char
-    "\uFFFD",
+    //    expected: nothing
+    "",
  ],
  // 11: Half code unit at the end of the input
  [
    "%D8",
-    //    expected: one replacement char
-    "\uFFFD",
+    //    expected: nothing
+    "",
  ],
 ];

--- a/xpcom/io/nsIUnicharInputStream.idl
+++ b/xpcom/io/nsIUnicharInputStream.idl
@ -16,10 +16,9 @@ interface nsIInputStream;
 * @param aInStream stream being read
 * @param aClosure opaque parameter passed to ReadSegments
 * @param aFromSegment pointer to memory owned by the input stream
- * @param aToOffset number of UTF-16 code units already read
- *                  (since ReadSegments was called)
+ * @param aToOffset amount already read (since ReadSegments was called)
 * @param aCount length of fromSegment
- * @param aWriteCount number of UTF-16 code units read
+ * @param aWriteCount number of bytes read
 *
 * Implementers should return the following:
 *
@ -39,19 +38,19 @@ typedef nsresult (*nsWriteUnicharSegmentFun)(nsIUnicharInputStream *aInStream,
 native nsWriteUnicharSegmentFun(nsWriteUnicharSegmentFun);

 /**
- * Abstract UTF-16 input stream
+ * Abstract unicode character input stream
 * @see nsIInputStream
 */
 [scriptable, uuid(d5e3bd80-6723-4b92-b0c9-22f6162fd94f)]
 interface nsIUnicharInputStream : nsISupports {
  /**
-   * Reads into a caller-provided array.
+   * Reads into a caller-provided character array.
   *
-   * @return The number of utf-16 code units that were successfully read.
-   *         May be less than aCount, even if there is more data in the input
-   *         stream. A return value of 0 means EOF.
+   * @return The number of characters that were successfully read. May be less
+   *         than aCount, even if there is more data in the input stream.
+   *         A return value of 0 means EOF.
   *
-   * @note To read more than 2^32 code units, call this method multiple times.
+   * @note To read more than 2^32 characters, call this method multiple times.
   */
  [noscript] unsigned long read([array, size_is(aCount)] in char16_t aBuf,
                                in unsigned long aCount);
@ -61,13 +60,13 @@ interface nsIUnicharInputStream : nsISupports {
   * The writer function may be called multiple times for segmented buffers.
   * ReadSegments is expected to keep calling the writer until either there is
   * nothing left to read or the writer returns an error.  ReadSegments should
-   * not call the writer with zero UTF-16 code units to consume.
+   * not call the writer with zero characters to consume.
   *
   * @param aWriter the "consumer" of the data to be read
   * @param aClosure opaque parameter passed to writer
-   * @param aCount the maximum number of UTF-16 code units to be read
+   * @param aCount the maximum number of characters to be read
   *
-   * @return number of UTF-16 code units read (may be less than aCount)
+   * @return number of characters read (may be less than aCount)
   * @return 0 if reached end of file (or if aWriter refused to consume data)
   *
   * @throws NS_BASE_STREAM_WOULD_BLOCK if reading from the input stream would
@ -83,15 +82,14 @@ interface nsIUnicharInputStream : nsISupports {

  /**
   * Read into a string object.
-   *
-   * @param aCount The number of UTF-16 code units that should be read
-   * @return The number of UTF-16 code units that were read.
+   * @param aCount The number of characters that should be read
+   * @return The number of characters that were read.
   */
  unsigned long readString(in unsigned long aCount, out AString aString);

-  /**
-   * Close the stream and free associated resources. This also closes the
-   * underlying stream, if any.
-   */
+ /**
+  * Close the stream and free associated resources. This also closes the
+  * underlying stream, if any.
+  */
  void close();
 };