/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * SourceText encapsulates a count of char16_t (UTF-16) or Utf8Unit (UTF-8) * code units (note: code *units*, not bytes or code points) and those code * units ("source units"). (Latin-1 is not supported: all places where Latin-1 * must be compiled first convert to a supported encoding.) * * A SourceText either observes without owning, or takes ownership of, source * units passed to |SourceText::init|. Thus SourceText can be used to * efficiently avoid copying. * * Rules for use: * * 1) The passed-in source units must be allocated with js_malloc(), * js_calloc(), or js_realloc() if |SourceText::init| is instructed to take * ownership of the source units. * 2) If |SourceText::init| merely borrows the source units, the user must * keep them alive until associated JS compilation is complete. * 3) Code that calls |SourceText::take{Chars,Units}()| must keep the source * units alive until JS compilation completes. Normally only the JS engine * should call |SourceText::take{Chars,Units}()|. * 4) Use the appropriate SourceText parameterization depending on the source * units encoding. * * Example use: * * size_t length = 512; * char16_t* chars = js_pod_malloc(length); * if (!chars) { * JS_ReportOutOfMemory(cx); * return false; * } * JS::SourceText srcBuf; * if (!srcBuf.init(cx, chars, length, JS::SourceOwnership::TakeOwnership)) { * return false; * } * JS::Rooted script(cx); * if (!JS::Compile(cx, options, srcBuf, &script)) { * return false; * } */ #ifndef js_SourceText_h #define js_SourceText_h #include "mozilla/Assertions.h" // MOZ_ASSERT #include "mozilla/Attributes.h" // MOZ_COLD, MOZ_IS_CLASS_INIT, MOZ_MUST_USE #include "mozilla/Likely.h" // MOZ_UNLIKELY #include "mozilla/Utf8.h" // mozilla::Utf8Unit #include // size_t #include // UINT32_MAX #include // std::conditional_t, std::is_same_v #include "js/UniquePtr.h" // js::UniquePtr #include "js/Utility.h" // JS::FreePolicy namespace JS { namespace detail { MOZ_COLD extern JS_PUBLIC_API void ReportSourceTooLong(JSContext* cx); } // namespace detail enum class SourceOwnership { Borrowed, TakeOwnership, }; template class SourceText final { private: static_assert(std::is_same_v || std::is_same_v, "Unit must be either char16_t or Utf8Unit for " "SourceText"); /** |char16_t| or |Utf8Unit| source units of uncertain validity. */ const Unit* units_ = nullptr; /** The length in code units of |units_|. */ uint32_t length_ = 0; /** * Whether this owns |units_| or merely observes source units owned by some * other object. */ bool ownsUnits_ = false; public: // A C++ character type that can represent the source units -- suitable for // passing to C++ string functions. using CharT = std::conditional_t, char16_t, char>; public: /** * Construct a SourceText. It must be initialized using |init()| before it * can be used as compilation source text. */ SourceText() = default; /** * Construct a SourceText from contents extracted from |other|. This * SourceText will then act exactly as |other| would have acted, had it * not been passed to this function. |other| will return to its default- * constructed state and must have |init()| called on it to use it. */ SourceText(SourceText&& other) : units_(other.units_), length_(other.length_), ownsUnits_(other.ownsUnits_) { other.units_ = nullptr; other.length_ = 0; other.ownsUnits_ = false; } ~SourceText() { if (ownsUnits_) { js_free(const_cast(units_)); } } /** * Initialize this with source unit data: |char16_t| for UTF-16 source * units, or |Utf8Unit| for UTF-8 source units. * * If |ownership == TakeOwnership|, *this function* takes ownership of * |units|, *even if* this function fails, and you MUST NOT free |units| * yourself. This single-owner-friendly approach reduces risk of leaks on * failure. * * |units| may be null if |unitsLength == 0|; if so, this will silently be * initialized using non-null, unowned units. */ MOZ_IS_CLASS_INIT MOZ_MUST_USE bool init(JSContext* cx, const Unit* units, size_t unitsLength, SourceOwnership ownership) { MOZ_ASSERT_IF(units == nullptr, unitsLength == 0); // Ideally we'd use |Unit| and not cast below, but the risk of a static // initializer is too great. static const CharT emptyString[] = {'\0'}; // Initialize all fields *before* checking length. This ensures that // if |ownership == SourceOwnership::TakeOwnership|, |units| will be // freed when |this|'s destructor is called. if (units) { units_ = units; length_ = static_cast(unitsLength); ownsUnits_ = ownership == SourceOwnership::TakeOwnership; } else { units_ = reinterpret_cast(emptyString); length_ = 0; ownsUnits_ = false; } // IMPLEMENTATION DETAIL, DO NOT RELY ON: This limit is used so we can // store offsets in |JSScript|s as |uint32_t|. It could be lifted // fairly easily if desired, as the compiler uses |size_t| internally. if (MOZ_UNLIKELY(unitsLength > UINT32_MAX)) { detail::ReportSourceTooLong(cx); return false; } return true; } /** * Exactly identical to the |init()| overload above that accepts * |const Unit*|, but instead takes character data: |const CharT*|. * * (We can't just write this to accept |const CharT*|, because then in the * UTF-16 case this overload and the one above would be identical. So we * use SFINAE to expose the |CharT| overload only if it's different.) */ template && !std::is_same_v>> MOZ_IS_CLASS_INIT MOZ_MUST_USE bool init(JSContext* cx, const Char* chars, size_t charsLength, SourceOwnership ownership) { return init(cx, reinterpret_cast(chars), charsLength, ownership); } /** * Initialize this using source units transferred out of |data|. */ MOZ_MUST_USE bool init(JSContext* cx, js::UniquePtr data, size_t dataLength) { return init(cx, data.release(), dataLength, SourceOwnership::TakeOwnership); } /** * Exactly identical to the |init()| overload above that accepts * |UniquePtr|, but instead takes character data: * |UniquePtr|. * * (We can't just duplicate the signature above with s/Unit/CharT/, because * then in the UTF-16 case this overload and the one above would be identical. * So we use SFINAE to expose the |CharT| overload only if it's different.) */ template && !std::is_same_v>> MOZ_MUST_USE bool init(JSContext* cx, js::UniquePtr data, size_t dataLength) { return init(cx, data.release(), dataLength, SourceOwnership::TakeOwnership); } /** * Access the encapsulated data using a code unit type. * * This function is useful for code that wants to interact with source text * as *code units*, not as string data. This doesn't matter for UTF-16, * but it's a crucial distinction for UTF-8. When UTF-8 source text is * encapsulated, |Unit| being |mozilla::Utf8Unit| unambiguously indicates * that the code units are UTF-8. In contrast |const char*| returned by * |get()| below could hold UTF-8 (or its ASCII subset) or Latin-1 or (in * particularly cursed embeddings) EBCDIC or some other legacy character * set. Prefer this function to |get()| wherever possible. */ const Unit* units() const { return units_; } /** * Access the encapsulated data using a character type. * * This function is useful for interactions with character-centric actions * like interacting with UniqueChars/UniqueTwoByteChars or printing out * text in a debugger, that only work with |CharT|. But as |CharT| loses * encoding specificity when UTF-8 source text is encapsulated, prefer * |units()| to this function. */ const CharT* get() const { return reinterpret_cast(units_); } /** * Returns true if this owns the source units and will free them on * destruction. If true, it is legal to call |take{Chars,Units}()|. */ bool ownsUnits() const { return ownsUnits_; } /** * Count of the underlying source units -- code units, not bytes or code * points -- in this. */ uint32_t length() const { return length_; } /** * Retrieve and take ownership of the underlying source units. The caller * is now responsible for calling js_free() on the returned value, *but * only after JS script compilation has completed*. * * After underlying source units have been taken, this will continue to * refer to the same data -- it just won't own the data. get() and * length() will return the same values, but ownsUnits() will be false. * The taken source units must be kept alive until after JS script * compilation completes, as noted above, for this to be safe. * * The caller must check ownsUnits() before calling takeUnits(). Taking * and then free'ing an unowned buffer will have dire consequences. */ Unit* takeUnits() { MOZ_ASSERT(ownsUnits_); ownsUnits_ = false; return const_cast(units_); } /** * Akin to |takeUnits()| in all respects, but returns characters rather * than units. */ CharT* takeChars() { return reinterpret_cast(takeUnits()); } private: SourceText(const SourceText&) = delete; void operator=(const SourceText&) = delete; }; } // namespace JS #endif /* js_SourceText_h */