Bug 1364974: Part 5 - Perform off-thread decode operations in chunks, rather than singly. r=shu

MozReview-Commit-ID: DapDuQ8rdTI --HG-- extra : rebase_source : 5273199757af7e72c1396d1bc62a564c0d5adb0f
2017-05-16 19:42:12 -07:00 · 2017-05-16 19:42:12 -07:00 · 9c9dbaaaa3
--- a/js/xpconnect/loader/ScriptPreloader.cpp
+++ b/js/xpconnect/loader/ScriptPreloader.cpp
@ -269,19 +269,16 @@ ScriptPreloader::Cleanup()
 void
 ScriptPreloader::InvalidateCache()
 {
+    mMonitor.AssertNotCurrentThreadOwns();
    MonitorAutoLock mal(mMonitor);

    mCacheInvalidated = true;

-    for (auto& script : IterHash(mScripts)) {
-        // We can only purge finished scripts here. Async scripts that are
-        // still being parsed off-thread have a non-refcounted reference to
-        // this script, which needs to stay alive until they finish parsing.
-        if (script->mReadyToExecute) {
-            script->Cancel();
-            script.Remove();
-        }
-    }
+    mParsingScripts.clearAndFree();
+    while (auto script = mPendingScripts.getFirst())
+        script->remove();
+    for (auto& script : IterHash(mScripts))
+        script.Remove();

    // If we've already finished saving the cache at this point, start a new
    // delayed save operation. This will write out an empty cache file in place
@ -437,13 +434,13 @@ ScriptPreloader::InitCacheInternal()
        return Err(NS_ERROR_UNEXPECTED);
    }

-    AutoTArray<CachedScript*, 256> scripts;
-
    {
        auto cleanup = MakeScopeExit([&] () {
            mScripts.Clear();
        });

+        LinkedList<CachedScript> scripts;
+
        Range<uint8_t> header(data, data + headerSize);
        data += headerSize;

@ -468,7 +465,14 @@ ScriptPreloader::InitCacheInternal()

            script->mXDRRange.emplace(scriptData, scriptData + script->mSize);

-            scripts.AppendElement(script.get());
+            // Don't pre-decode the script unless it was used in this process type during the
+            // previous session.
+            if (script->mOriginalProcessTypes.contains(CurrentProcessType())) {
+                scripts.insertBack(script.get());
+            } else {
+                script->mReadyToExecute = true;
+            }
+
            mScripts.Put(script->mCachePath, script.get());
            Unused << script.release();
        }
@ -477,32 +481,11 @@ ScriptPreloader::InitCacheInternal()
            return Err(NS_ERROR_UNEXPECTED);
        }

+        mPendingScripts = Move(scripts);
        cleanup.release();
    }

-    AutoJSAPI jsapi;
-    MOZ_RELEASE_ASSERT(jsapi.Init(xpc::CompilationScope()));
-    JSContext* cx = jsapi.cx();
-
-    auto start = TimeStamp::Now();
-    LOG(Info, "Off-thread decoding scripts...\n");
-
-    JS::CompileOptions options(cx, JSVERSION_LATEST);
-
-    for (auto& script : scripts) {
-        // Only async decode scripts which have been used in this process type.
-        if (script->mProcessTypes.contains(CurrentProcessType()) &&
-            script->AsyncDecodable() &&
-            JS::CanCompileOffThread(cx, options, script->mSize)) {
-            DecodeScriptOffThread(cx, script);
-        } else {
-            script->mReadyToExecute = true;
-        }
-    }
-
-    LOG(Info, "Initialized decoding in %fms\n",
-        (TimeStamp::Now() - start).ToMilliseconds());
-
+    DecodeNextBatch(OFF_THREAD_FIRST_CHUNK_SIZE);
    return Ok();
 }

@ -760,12 +743,25 @@ ScriptPreloader::GetCachedScript(JSContext* cx, const nsCString& path)
 JSScript*
 ScriptPreloader::WaitForCachedScript(JSContext* cx, CachedScript* script)
 {
+    // Check for finished operations before locking so that we can move onto
+    // decoding the next batch as soon as possible after the pending batch is
+    // ready. If we wait until we hit an unfinished script, we wind up having at
+    // most one batch of buffered scripts, and occasionally under-running that
+    // buffer.
+    FinishOffThreadDecode();
+
    if (!script->mReadyToExecute) {
        LOG(Info, "Must wait for async script load: %s\n", script->mURL.get());
        auto start = TimeStamp::Now();

+        mMonitor.AssertNotCurrentThreadOwns();
        MonitorAutoLock mal(mMonitor);

+        // Check for finished operations again *after* locking, or we may race
+        // against mToken being set between our last check and the time we
+        // entered the mutex.
+        FinishOffThreadDecode();
+
        if (!script->mReadyToExecute && script->mSize < MAX_MAINTHREAD_DECODE_SIZE) {
            LOG(Info, "Script is small enough to recompile on main thread\n");

@ -773,63 +769,168 @@ ScriptPreloader::WaitForCachedScript(JSContext* cx, CachedScript* script)
        } else {
            while (!script->mReadyToExecute) {
                mal.Wait();
+
+                MonitorAutoUnlock mau(mMonitor);
+                FinishOffThreadDecode();
            }
        }

-        LOG(Info, "Waited %fms\n", (TimeStamp::Now() - start).ToMilliseconds());
+        LOG(Debug, "Waited %fms\n", (TimeStamp::Now() - start).ToMilliseconds());
    }

    return script->GetJSScript(cx);
 }


-void
-ScriptPreloader::DecodeScriptOffThread(JSContext* cx, CachedScript* script)
+
+/* static */ void
+ScriptPreloader::OffThreadDecodeCallback(void* token, void* context)
 {
-    JS::CompileOptions options(cx, JSVERSION_LATEST);
+    auto cache = static_cast<ScriptPreloader*>(context);

-    options.setNoScriptRval(true)
-           .setFileAndLine(script->mURL.get(), 1);
+    cache->mMonitor.AssertNotCurrentThreadOwns();
+    MonitorAutoLock mal(cache->mMonitor);

-    if (!JS::DecodeOffThreadScript(cx, options, script->Range(),
-                                   OffThreadDecodeCallback,
-                                   static_cast<void*>(script))) {
+    // First notify any tasks that are already waiting on scripts, since they'll
+    // be blocking the main thread, and prevent any runnables from executing.
+    cache->mToken = token;
+    mal.NotifyAll();
+
+    // If nothing processed the token, and we don't already have a pending
+    // runnable, then dispatch a new one to finish the processing on the main
+    // thread as soon as possible.
+    if (cache->mToken && !cache->mFinishDecodeRunnablePending) {
+        cache->mFinishDecodeRunnablePending = true;
+        NS_DispatchToMainThread(
+            NewRunnableMethod(cache, &ScriptPreloader::DoFinishOffThreadDecode));
+    }
+}
+
+void
+ScriptPreloader::DoFinishOffThreadDecode()
+{
+    mFinishDecodeRunnablePending = false;
+    FinishOffThreadDecode();
+}
+
+void
+ScriptPreloader::FinishOffThreadDecode()
+{
+    if (!mToken) {
+        return;
+    }
+
+    auto cleanup = MakeScopeExit([&] () {
+        mToken = nullptr;
+        mParsingSources.clear();
+        mParsingScripts.clear();
+
+        DecodeNextBatch(OFF_THREAD_CHUNK_SIZE);
+    });
+
+    AutoJSAPI jsapi;
+    MOZ_RELEASE_ASSERT(jsapi.Init(xpc::CompilationScope()));
+
+    JSContext* cx = jsapi.cx();
+    JS::Rooted<JS::ScriptVector> jsScripts(cx, JS::ScriptVector(cx));
+
+    // If this fails, we still need to mark the scripts as finished. Any that
+    // weren't successfully compiled in this operation (which should never
+    // happen under ordinary circumstances) will be re-decoded on the main
+    // thread, and raise the appropriate errors when they're executed.
+    //
+    // The exception from the off-thread decode operation will be reported when
+    // we pop the AutoJSAPI off the stack.
+    Unused << JS::FinishMultiOffThreadScriptsDecoder(cx, mToken, &jsScripts);
+
+    unsigned i = 0;
+    for (auto script : mParsingScripts) {
+        LOG(Debug, "Finished off-thread decode of %s\n", script->mURL.get());
+        if (i < jsScripts.length())
+            script->mScript = jsScripts[i++];
        script->mReadyToExecute = true;
    }
 }

 void
-ScriptPreloader::CancelOffThreadParse(void* token)
+ScriptPreloader::DecodeNextBatch(size_t chunkSize)
 {
-    AutoSafeJSAPI jsapi;
-    JS::CancelOffThreadScriptDecoder(jsapi.cx(), token);
-}
+    MOZ_ASSERT(mParsingSources.length() == 0);
+    MOZ_ASSERT(mParsingScripts.length() == 0);

-/* static */ void
-ScriptPreloader::OffThreadDecodeCallback(void* token, void* context)
-{
-    auto script = static_cast<CachedScript*>(context);
+    auto cleanup = MakeScopeExit([&] () {
+        mParsingScripts.clearAndFree();
+        mParsingSources.clearAndFree();
+    });

-    MonitorAutoLock mal(script->mCache.mMonitor);
+    auto start = TimeStamp::Now();
+    LOG(Debug, "Off-thread decoding scripts...\n");

-    if (script->mReadyToExecute) {
-        // We've already executed this script on the main thread, and opted to
-        // main thread decode it rather waiting for off-thread decoding to
-        // finish. So just cancel the off-thread parse rather than completing
-        // it.
-        NS_DispatchToMainThread(
-            NewRunnableMethod<void*>(&script->mCache,
-                                     &ScriptPreloader::CancelOffThreadParse,
-                                     token));
+    size_t size = 0;
+    for (CachedScript* next = mPendingScripts.getFirst(); next;) {
+        auto script = next;
+        next = script->getNext();
+
+        // Skip any scripts that we decoded on the main thread rather than
+        // waiting for an off-thread operation to complete.
+        if (script->mReadyToExecute) {
+            script->remove();
+            continue;
+        }
+        // If we have enough data for one chunk and this script would put us
+        // over our chunk size limit, we're done.
+        if (size > SMALL_SCRIPT_CHUNK_THRESHOLD &&
+            size + script->mSize > chunkSize) {
+            break;
+        }
+        if (!mParsingScripts.append(script) ||
+            !mParsingSources.emplaceBack(script->Range(), script->mURL.get(), 0)) {
+            break;
+        }
+
+        LOG(Debug, "Beginning off-thread decode of script %s (%u bytes)\n",
+            script->mURL.get(), script->mSize);
+
+        script->remove();
+        size += script->mSize;
+    }
+
+    if (size == 0 && mPendingScripts.isEmpty()) {
        return;
    }

-    script->mToken = token;
-    script->mReadyToExecute = true;
+    AutoJSAPI jsapi;
+    MOZ_RELEASE_ASSERT(jsapi.Init(xpc::CompilationScope()));
+    JSContext* cx = jsapi.cx();

-    mal.NotifyAll();
+    JS::CompileOptions options(cx, JSVERSION_LATEST);
+    options.setNoScriptRval(true);
+
+    if (!JS::CanCompileOffThread(cx, options, size) ||
+        !JS::DecodeMultiOffThreadScripts(cx, options, mParsingSources,
+                                         OffThreadDecodeCallback,
+                                         static_cast<void*>(this))) {
+        // If we fail here, we don't move on to process the next batch, so make
+        // sure we don't have any other scripts left to process.
+        MOZ_ASSERT(mPendingScripts.isEmpty());
+        for (auto script : mPendingScripts) {
+            script->mReadyToExecute = true;
+        }
+
+        LOG(Info, "Can't decode %lu bytes of scripts off-thread", (unsigned long)size);
+        for (auto script : mParsingScripts) {
+            script->mReadyToExecute = true;
+        }
+        return;
+    }
+
+    cleanup.release();
+
+    LOG(Debug, "Initialized decoding of %u scripts (%u bytes) in %fms\n",
+        (unsigned)mParsingSources.length(), (unsigned)size, (TimeStamp::Now() - start).ToMilliseconds());
 }

+
 ScriptPreloader::CachedScript::CachedScript(ScriptPreloader& cache, InputBuffer& buf)
    : mCache(cache)
 {
@ -859,20 +960,6 @@ ScriptPreloader::CachedScript::XDREncode(JSContext* cx)
    return false;
 }

-void
-ScriptPreloader::CachedScript::Cancel()
-{
-    if (mToken) {
-        mCache.mMonitor.AssertCurrentThreadOwns();
-
-        AutoSafeJSAPI jsapi;
-        JS::CancelOffThreadScriptDecoder(jsapi.cx(), mToken);
-
-        mReadyToExecute = true;
-        mToken = nullptr;
-    }
-}
-
 JSScript*
 ScriptPreloader::CachedScript::GetJSScript(JSContext* cx)
 {
@ -881,35 +968,27 @@ ScriptPreloader::CachedScript::GetJSScript(JSContext* cx)
        return mScript;
    }

-    // If we have no token at this point, the script was too small to decode
+    // If we have no script at this point, the script was too small to decode
    // off-thread, or it was needed before the off-thread compilation was
    // finished, and is small enough to decode on the main thread rather than
    // wait for the off-thread decoding to finish. In either case, we decode
    // it synchronously the first time it's needed.
-    if (!mToken) {
-        MOZ_ASSERT(HasRange());
+    MOZ_ASSERT(HasRange());

-        JS::RootedScript script(cx);
-        if (JS::DecodeScript(cx, Range(), &script)) {
-            mScript = script;
+    auto start = TimeStamp::Now();
+    LOG(Info, "Decoding script %s on main thread...\n", mURL.get());

-            if (mCache.mSaveComplete) {
-                FreeData();
-            }
+    JS::RootedScript script(cx);
+    if (JS::DecodeScript(cx, Range(), &script)) {
+        mScript = script;
+
+        if (mCache.mSaveComplete) {
+            FreeData();
        }
-
-        return mScript;
    }

-    Maybe<JSAutoCompartment> ac;
-    if (JS::CompartmentCreationOptionsRef(cx).addonIdOrNull()) {
-        // Make sure we never try to finish the parse in a compartment with an
-        // add-on ID, it wasn't started in one.
-        ac.emplace(cx, xpc::CompilationScope());
-    }
+    LOG(Debug, "Finished decoding in %fms", (TimeStamp::Now() - start).ToMilliseconds());

-    mScript = JS::FinishOffThreadScriptDecoder(cx, mToken);
-    mToken = nullptr;
    return mScript;
 }

--- a/js/xpconnect/loader/ScriptPreloader.h
+++ b/js/xpconnect/loader/ScriptPreloader.h
@ -137,7 +137,7 @@ private:
    // the next session's cache file. If it was compiled in this session, its
    // mXDRRange will initially be empty, and its mXDRData buffer will be
    // populated just before it is written to the cache file.
-    class CachedScript
+    class CachedScript : public LinkedListElement<CachedScript>
    {
    public:
        CachedScript(CachedScript&&) = default;
@ -161,27 +161,18 @@ private:

        // For use with nsTArray::Sort.
        //
-        // Orders scripts by:
-        //
-        // 1) Async-decoded scripts before sync-decoded scripts, since the
-        //    former are needed immediately at startup, and should be stored
-        //    contiguously.
-        // 2) Script load time, so that scripts which are needed earlier are
-        //    stored earlier, and scripts needed at approximately the same
-        //    time are stored approximately contiguously.
+        // Orders scripts by script load time, so that scripts which are needed
+        // earlier are stored earlier, and scripts needed at approximately the
+        // same time are stored approximately contiguously.
        struct Comparator
        {
            bool Equals(const CachedScript* a, const CachedScript* b) const
            {
-              return (a->AsyncDecodable() == b->AsyncDecodable() &&
-                      a->mLoadTime == b->mLoadTime);
+              return a->mLoadTime == b->mLoadTime;
            }

            bool LessThan(const CachedScript* a, const CachedScript* b) const
            {
-              if (a->AsyncDecodable() != b->AsyncDecodable()) {
-                return a->AsyncDecodable();
-              }
              return a->mLoadTime < b->mLoadTime;
            }
        };
@ -198,8 +189,6 @@ private:
            const ScriptStatus mStatus;
        };

-        void Cancel();
-
        void FreeData()
        {
            // If the script data isn't mmapped, we need to release both it
@ -217,8 +206,6 @@ private:
          }
        }

-        bool AsyncDecodable() const { return mSize > MIN_OFFTHREAD_SIZE; }
-
        // Encodes this script into XDR data, and stores the result in mXDRData.
        // Returns true on success, false on failure.
        bool XDREncode(JSContext* cx);
@ -306,10 +293,6 @@ private:
        // whenever it is first executed.
        bool mReadyToExecute = false;

-        // The off-thread decode token for a completed off-thread decode, which
-        // has not yet been finalized on the main thread.
-        void* mToken = nullptr;
-
        // The set of processes in which this script has been used.
        EnumSet<ProcessType> mProcessTypes{};

@ -334,15 +317,31 @@ private:
        return &matcher;
    }

-    // There's a trade-off between the time it takes to setup an off-thread
-    // decode and the time we save by doing the decode off-thread. At this
-    // point, the setup is quite expensive, and 20K is about where we start to
-    // see an improvement rather than a regression.
+    // There's a significant setup cost for each off-thread decode operation,
+    // so scripts are decoded in chunks to minimize the overhead. There's a
+    // careful balancing act in choosing the size of chunks, to minimize the
+    // number of decode operations, while also minimizing the number of buffer
+    // underruns that require the main thread to wait for a script to finish
+    // decoding.
    //
-    // This also means that we get much better performance loading one big
-    // script than several small scripts, since the setup is per-script, and the
-    // OMT compile is almost always complete by the time we need a given script.
-    static constexpr int MIN_OFFTHREAD_SIZE = 20 * 1024;
+    // For the first chunk, we don't have much time between the start of the
+    // decode operation and the time the first script is needed, so that chunk
+    // needs to be fairly small. After the first chunk is finished, we have
+    // some buffered scripts to fall back on, and a lot more breathing room,
+    // so the chunks can be a bit bigger, but still not too big.
+    static constexpr int OFF_THREAD_FIRST_CHUNK_SIZE = 128 * 1024;
+    static constexpr int OFF_THREAD_CHUNK_SIZE = 512 * 1024;
+
+    // Ideally, we want every chunk to be smaller than the chunk sizes
+    // specified above. However, if we have some number of small scripts
+    // followed by a huge script that would put us over the normal chunk size,
+    // we're better off processing them as a single chunk.
+    //
+    // In order to guarantee that the JS engine will process a chunk
+    // off-thread, it needs to be at least 100K (which is an implementation
+    // detail that can change at any time), so make sure that we always hit at
+    // least that size, with a bit of breathing room to be safe.
+    static constexpr int SMALL_SCRIPT_CHUNK_THRESHOLD = 128 * 1024;

    // The maximum size of scripts to re-decode on the main thread if off-thread
    // decoding hasn't finished yet. In practice, we don't hit this very often,
@ -377,11 +376,11 @@ private:
    // decodes it synchronously on the main thread, as appropriate.
    JSScript* WaitForCachedScript(JSContext* cx, CachedScript* script);

-    // Begins decoding the given script in a background thread.
-    void DecodeScriptOffThread(JSContext* cx, CachedScript* script);
+    void DecodeNextBatch(size_t chunkSize);

    static void OffThreadDecodeCallback(void* token, void* context);
-    void CancelOffThreadParse(void* token);
+    void FinishOffThreadDecode();
+    void DoFinishOffThreadDecode();

    size_t ShallowHeapSizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf)
    {
@ -412,6 +411,22 @@ private:
    bool mDataPrepared = false;
    bool mCacheInvalidated = false;

+    // The list of scripts that we read from the initial startup cache file,
+    // but have yet to initiate a decode task for.
+    LinkedList<CachedScript> mPendingScripts;
+
+    // The lists of scripts and their sources that make up the chunk currently
+    // being decoded in a background thread.
+    JS::TranscodeSources mParsingSources;
+    Vector<CachedScript*> mParsingScripts;
+
+    // The token for the completed off-thread decode task.
+    void* mToken = nullptr;
+
+    // True if a runnable has been dispatched to the main thread to finish an
+    // off-thread decode operation.
+    bool mFinishDecodeRunnablePending = false;
+
    // The process type of the current process.
    static ProcessType sProcessType;