From c66fe2ba5dacacc27d60aa154bd451bd1ea98904 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Mon, 15 Apr 2019 21:11:40 +0000 Subject: [PATCH] Bug 1441436 - ARM64 Simulator: Add D&I cache coherency checks. r=sstangl Differential Revision: https://phabricator.services.mozilla.com/D19970 --HG-- extra : moz-landing-system : lando --- js/src/jit/ExecutableAllocator.h | 4 +- js/src/jit/arm64/vixl/MozCachingDecoder.h | 179 ++++++++++++++++++++ js/src/jit/arm64/vixl/MozCpu-vixl.cpp | 20 ++- js/src/jit/arm64/vixl/MozSimulator-vixl.cpp | 164 +++++++++++++++--- js/src/jit/arm64/vixl/Simulator-vixl.h | 39 +++++ 5 files changed, 382 insertions(+), 24 deletions(-) create mode 100644 js/src/jit/arm64/vixl/MozCachingDecoder.h diff --git a/js/src/jit/ExecutableAllocator.h b/js/src/jit/ExecutableAllocator.h index 0451f6e0da7a..f40fb5c826f8 100644 --- a/js/src/jit/ExecutableAllocator.h +++ b/js/src/jit/ExecutableAllocator.h @@ -216,7 +216,7 @@ class ExecutableAllocator { static void poisonCode(JSRuntime* rt, JitPoisonRangeVector& ranges); #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) || \ - defined(JS_SIMULATOR_ARM64) || defined(JS_CODEGEN_NONE) + defined(JS_CODEGEN_NONE) static void cacheFlush(void*, size_t) {} #elif defined(JS_SIMULATOR_ARM) || defined(JS_SIMULATOR_MIPS32) || \ defined(JS_SIMULATOR_MIPS64) @@ -289,7 +289,7 @@ class ExecutableAllocator { : "r0", "r1", "r2"); } } -#elif defined(JS_CODEGEN_ARM64) +#elif defined(JS_SIMULATOR_ARM64) || defined(JS_CODEGEN_ARM64) static void cacheFlush(void* code, size_t size) { vixl::CPU::EnsureIAndDCacheCoherency(code, size); } diff --git a/js/src/jit/arm64/vixl/MozCachingDecoder.h b/js/src/jit/arm64/vixl/MozCachingDecoder.h new file mode 100644 index 000000000000..5b4cfc17d503 --- /dev/null +++ b/js/src/jit/arm64/vixl/MozCachingDecoder.h @@ -0,0 +1,179 @@ +#ifndef VIXL_A64_MOZ_CACHING_DECODER_A64_H_ +#define VIXL_A64_MOZ_CACHING_DECODER_A64_H_ + +#include "mozilla/HashTable.h" + +#include "jit/arm64/vixl/Decoder-vixl.h" +#include "js/AllocPolicy.h" + +#ifdef DEBUG +#define JS_CACHE_SIMULATOR_ARM64 1 +#endif + +#ifdef JS_CACHE_SIMULATOR_ARM64 +namespace vixl { + +// This enumeration list the different kind of instructions which can be +// decoded. These kind correspond to the set of visitor defined by the default +// Decoder. +enum class InstDecodedKind : uint8_t { + NotDecodedYet, +#define DECLARE(E) E, + VISITOR_LIST(DECLARE) +#undef DECLARE +}; + +// A SinglePageDecodeCache is used to store the decoded kind of all instructions +// in an executable page of code. Each time an instruction is decoded, its +// decoded kind is recorded in this structure. The previous instruction value is +// also recorded in this structure when using a debug build. +// +// The next time the same offset is visited, the instruction would be decoded +// using the previously recorded decode kind. It is also compared against the +// previously recorded bits of the instruction to check for potential missing +// cache invalidations, in debug builds. +// +// This structure stores the equivalent of a single page of code to have better +// memory locality when using the simulator. As opposed to having a hash-table +// for all instructions. However a hash-table is used by the CachingDecoder to +// map the prefixes of page addresses to these SinglePageDecodeCaches. +class SinglePageDecodeCache { + public: + static const uintptr_t PageSize = 1 << 12; + static const uintptr_t PageMask = PageSize - 1; + static const uintptr_t InstSize = vixl::kInstructionSize; + static const uintptr_t InstMask = InstSize - 1; + static const uintptr_t InstPerPage = PageSize / InstSize; + + SinglePageDecodeCache(const Instruction* inst) + : pageStart_(PageStart(inst)) + { + memset(&decodeCache_, int(InstDecodedKind::NotDecodedYet), sizeof(decodeCache_)); + } + // Compute the start address of the page which contains this instruction. + static uintptr_t PageStart(const Instruction* inst) { + return uintptr_t(inst) & ~PageMask; + } + // Returns whether the instruction decoded kind is stored in this + // SinglePageDecodeCache. + bool contains(const Instruction* inst) { + return pageStart_ == PageStart(inst); + } + void clearDecode(const Instruction* inst) { + uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize; + decodeCache_[offset] = InstDecodedKind::NotDecodedYet; + } + InstDecodedKind* decodePtr(const Instruction* inst) { + uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize; + uint32_t instValue = *reinterpret_cast(inst); + instCache_[offset] = instValue; + return &decodeCache_[offset]; + } + InstDecodedKind decode(const Instruction* inst) const { + uintptr_t offset = (uintptr_t(inst) & PageMask) / InstSize; + InstDecodedKind val = decodeCache_[offset]; + uint32_t instValue = *reinterpret_cast(inst); + MOZ_ASSERT_IF(val != InstDecodedKind::NotDecodedYet, + instCache_[offset] == instValue); + return val; + } + + private: + // Record the address at which the corresponding code page starts. + const uintptr_t pageStart_; + + // Cache what instruction got decoded previously, in order to assert if we see + // any stale instructions after. + uint32_t instCache_[InstPerPage]; + + // Cache the decoding of the instruction such that we can skip the decoding + // part. + InstDecodedKind decodeCache_[InstPerPage]; +}; + +// A DecoderVisitor which will record which visitor function should be called +// the next time we want to decode the same instruction. +class CachingDecoderVisitor : public DecoderVisitor { + public: + CachingDecoderVisitor() = default; + virtual ~CachingDecoderVisitor() {} + +#define DECLARE(A) virtual void Visit##A(const Instruction* instr) { \ + if (last_) { \ + MOZ_ASSERT(*last_ == InstDecodedKind::NotDecodedYet); \ + *last_ = InstDecodedKind::A; \ + last_ = nullptr; \ + } \ + }; + + VISITOR_LIST(DECLARE) +#undef DECLARE + + void setDecodePtr(InstDecodedKind* ptr) { + last_ = ptr; + } + + private: + InstDecodedKind* last_; +}; + +// The Caching decoder works by extending the default vixl Decoder class. It +// extends it by overloading the Decode function. +// +// The overloaded Decode function checks whether the instruction given as +// argument got decoded before or since it got invalidated. If it was not +// previously decoded, the value of the instruction is recorded as well as the +// kind of instruction. Otherwise, the value of the instruction is checked +// against the previously recorded value and the instruction kind is used to +// skip the decoding visitor and resume the execution of instruction. +// +// The caching decoder stores the equivalent of a page of executable code in a +// hash-table. Each SinglePageDecodeCache stores an array of decoded kind as +// well as the value of the previously decoded instruction. +// +// When testing if an instruction was decoded before, we check if the address of +// the instruction is contained in the last SinglePageDecodeCache. If it is not, +// then the hash-table entry is queried and created if necessary, and the last +// SinglePageDecodeCache is updated. Then, the last SinglePageDecodeCache +// necessary contains the decoded kind of the instruction given as argument. +// +// The caching decoder add an extra function for flushing the cache, which is in +// charge of clearing the decoded kind of instruction in the range of addresses +// given as argument. This is indirectly called by +// CPU::EnsureIAndDCacheCoherency. +class CachingDecoder : public Decoder { + using ICacheMap = mozilla::HashMap; + public: + CachingDecoder() + : lastPage_(nullptr) + { + PrependVisitor(&cachingDecoder_); + } + ~CachingDecoder() { + RemoveVisitor(&cachingDecoder_); + } + + void Decode(const Instruction* instr); + void Decode(Instruction* instr) { + Decode(const_cast(instr)); + } + + void FlushICache(void* start, size_t size); + + private: + // Record the type of the decoded instruction, to avoid decoding it a second + // time the next time we execute it. + CachingDecoderVisitor cachingDecoder_; + + // Store the mapping of Instruction pointer to the corresponding + // SinglePageDecodeCache. + ICacheMap iCache_; + + // Record the last SinglePageDecodeCache seen, such that we can quickly access + // it for the next instruction. + SinglePageDecodeCache* lastPage_; +}; + +} +#endif // !JS_CACHE_SIMULATOR_ARM64 +#endif // !VIXL_A64_MOZ_CACHING_DECODER_A64_H_ diff --git a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp index 374220250107..8710edb283c7 100644 --- a/js/src/jit/arm64/vixl/MozCpu-vixl.cpp +++ b/js/src/jit/arm64/vixl/MozCpu-vixl.cpp @@ -25,6 +25,7 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "jit/arm64/vixl/Cpu-vixl.h" +#include "jit/arm64/vixl/Simulator-vixl.h" #include "jit/arm64/vixl/Utils-vixl.h" #include "util/Windows.h" @@ -48,7 +49,24 @@ uint32_t CPU::GetCacheType() { void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) { -#if defined(_MSC_VER) && defined(_M_ARM64) +#ifdef JS_CACHE_SIMULATOR_ARM64 + // This code attempt to emulate what the following assembly sequence is doing, + // which is sending the information to other cores that some cache line have + // to be invalidated and applying them on the current core. + // + // This is done by recording the current range to be flushed to all + // simulators, then if there is a simulator associated with the current + // thread, applying all flushed ranges as the "isb" instruction would do. + using js::jit::SimulatorProcess; + js::jit::AutoLockSimulatorCache alsc; + if (length > 0) { + SimulatorProcess::recordICacheFlush(address, length); + } + Simulator* sim = vixl::Simulator::Current(); + if (sim) { + sim->FlushICache(); + } +#elif defined(_MSC_VER) && defined(_M_ARM64) FlushInstructionCache(GetCurrentProcess(), address, length); #elif defined(__aarch64__) // Implement the cache synchronisation for all targets where AArch64 is the diff --git a/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp index 147a72edb237..7d588d5e3787 100644 --- a/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp +++ b/js/src/jit/arm64/vixl/MozSimulator-vixl.cpp @@ -27,6 +27,7 @@ #include "mozilla/DebugOnly.h" #include "jit/arm64/vixl/Debugger-vixl.h" +#include "jit/arm64/vixl/MozCachingDecoder.h" #include "jit/arm64/vixl/Simulator-vixl.h" #include "jit/IonTypes.h" #include "js/UniquePtr.h" @@ -154,13 +155,20 @@ void Simulator::init(Decoder* decoder, FILE* stream) { Simulator* Simulator::Current() { JSContext* cx = js::TlsContext.get(); - MOZ_ASSERT(js::CurrentThreadCanAccessRuntime(cx->runtime())); + if (!cx) { + return nullptr; + } + JSRuntime* rt = cx->runtime(); + if (!rt) { + return nullptr; + } + MOZ_ASSERT(js::CurrentThreadCanAccessRuntime(rt)); return cx->simulator(); } Simulator* Simulator::Create() { - Decoder *decoder = js_new(); + Decoder *decoder = js_new(); if (!decoder) return nullptr; @@ -168,14 +176,25 @@ Simulator* Simulator::Create() { // FIXME: We should free it at some point. // FIXME: Note that it can't be stored in the SimulatorRuntime due to lifetime conflicts. js::UniquePtr sim; - if (getenv("USE_DEBUGGER") != nullptr) + if (getenv("USE_DEBUGGER") != nullptr) { sim.reset(js_new(decoder, stdout)); - else + } else { sim.reset(js_new(decoder, stdout)); + } // Check if Simulator:init ran out of memory. - if (sim && sim->oom()) + if (sim && sim->oom()) { return nullptr; + } + +#ifdef JS_CACHE_SIMULATOR_ARM64 + // Register the simulator in the Simulator process to handle cache flushes + // across threads. + js::jit::AutoLockSimulatorCache alsc; + if (!SimulatorProcess::registerSimulator(sim.get())) { + return nullptr; + } +#endif return sim.release(); } @@ -285,20 +304,6 @@ int64_t Simulator::call(uint8_t* entry, int argument_count, ...) { } -// Protects the icache and redirection properties of the simulator. -class AutoLockSimulatorCache : public js::LockGuard -{ - friend class Simulator; - using Base = js::LockGuard; - - public: - explicit AutoLockSimulatorCache() - : Base(SimulatorProcess::singleton_->lock_) - { - } -}; - - // When the generated code calls a VM function (masm.callWithABI) we need to // call that function instead of trying to execute it with the simulator // (because it's x64 code instead of AArch64 code). We do that by redirecting the VM @@ -315,7 +320,6 @@ class Redirection next_(nullptr) { next_ = SimulatorProcess::redirection(); - // TODO: Flush ICache? SimulatorProcess::setRedirection(this); Instruction* instr = (Instruction*)(&svcInstruction_); @@ -328,7 +332,7 @@ class Redirection ABIFunctionType type() const { return type_; } static Redirection* Get(void* nativeFunction, ABIFunctionType type) { - AutoLockSimulatorCache alsr; + js::jit::AutoLockSimulatorCache alsr; // TODO: Store srt_ in the simulator for this assertion. // VIXL_ASSERT_IF(pt->simulator(), pt->simulator()->srt_ == srt); @@ -713,9 +717,127 @@ Simulator::VisitCallRedirection(const Instruction* instr) printf("SVCRET\n"); } +#ifdef JS_CACHE_SIMULATOR_ARM64 +void +Simulator::FlushICache() +{ + // Flush the caches recorded by the current thread as well as what got + // recorded from other threads before this call. + auto& vec = SimulatorProcess::getICacheFlushes(this); + for (auto& flush : vec) { + decoder_->FlushICache(flush.start, flush.length); + } + vec.clear(); +} + +void CachingDecoder::Decode(const Instruction* instr) { + InstDecodedKind state; + if (lastPage_ && lastPage_->contains(instr)) { + state = lastPage_->decode(instr); + } else { + uintptr_t key = SinglePageDecodeCache::PageStart(instr); + ICacheMap::AddPtr p = iCache_.lookupForAdd(key); + if (p) { + lastPage_ = p->value(); + state = lastPage_->decode(instr); + } else { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + SinglePageDecodeCache* newPage = js_new(instr); + if (!newPage || !iCache_.add(p, key, newPage)) { + oomUnsafe.crash("Simulator SinglePageDecodeCache"); + } + lastPage_ = newPage; + state = InstDecodedKind::NotDecodedYet; + } + } + + switch (state) { + case InstDecodedKind::NotDecodedYet: { + cachingDecoder_.setDecodePtr(lastPage_->decodePtr(instr)); + this->Decoder::Decode(instr); + break; + } +#define CASE(A) \ + case InstDecodedKind::A: { \ + Visit##A(instr); \ + break; \ + } + + VISITOR_LIST(CASE) +#undef CASE + } +} + +void CachingDecoder::FlushICache(void* start, size_t size) { + MOZ_ASSERT(uintptr_t(start) % vixl::kInstructionSize == 0); + MOZ_ASSERT(size % vixl::kInstructionSize == 0); + const uint8_t* it = reinterpret_cast(start); + const uint8_t* end = it + size; + SinglePageDecodeCache* last = nullptr; + for (; it < end; it += vixl::kInstructionSize) { + auto instr = reinterpret_cast(it); + if (last && last->contains(instr)) { + last->clearDecode(instr); + } else { + uintptr_t key = SinglePageDecodeCache::PageStart(instr); + ICacheMap::Ptr p = iCache_.lookup(key); + if (p) { + last = p->value(); + last->clearDecode(instr); + } + } + } +} +#endif } // namespace vixl +namespace js { +namespace jit { + +#ifdef JS_CACHE_SIMULATOR_ARM64 +void SimulatorProcess::recordICacheFlush(void* start, size_t length) { + MOZ_ASSERT(singleton_->lock_.ownedByCurrentThread()); + AutoEnterOOMUnsafeRegion oomUnsafe; + ICacheFlush range{start, length}; + for (auto& s : singleton_->pendingFlushes_) { + if (!s.records.append(range)) { + oomUnsafe.crash("Simulator recordFlushICache"); + } + } +} + +SimulatorProcess::ICacheFlushes& SimulatorProcess::getICacheFlushes(Simulator* sim) { + MOZ_ASSERT(singleton_->lock_.ownedByCurrentThread()); + for (auto& s : singleton_->pendingFlushes_) { + if (s.thread == sim) { + return s.records; + } + } + MOZ_CRASH("Simulator is not registered in the SimulatorProcess"); +} + +bool SimulatorProcess::registerSimulator(Simulator* sim) { + MOZ_ASSERT(singleton_->lock_.ownedByCurrentThread()); + ICacheFlushes empty; + SimFlushes simFlushes{sim, std::move(empty)}; + return singleton_->pendingFlushes_.append(std::move(simFlushes)); +} + +void SimulatorProcess::unregisterSimulator(Simulator* sim) { + MOZ_ASSERT(singleton_->lock_.ownedByCurrentThread()); + for (auto& s : singleton_->pendingFlushes_) { + if (s.thread == sim) { + singleton_->pendingFlushes_.erase(&s); + return; + } + } + MOZ_CRASH("Simulator is not registered in the SimulatorProcess"); +} +#endif // !JS_CACHE_SIMULATOR_ARM64 + +} // namespace jit +} // namespace js vixl::Simulator* JSContext::simulator() const { return simulator_; diff --git a/js/src/jit/arm64/vixl/Simulator-vixl.h b/js/src/jit/arm64/vixl/Simulator-vixl.h index f8a24cbcd338..54c4da5301bd 100644 --- a/js/src/jit/arm64/vixl/Simulator-vixl.h +++ b/js/src/jit/arm64/vixl/Simulator-vixl.h @@ -38,6 +38,7 @@ #include "jit/arm64/vixl/Globals-vixl.h" #include "jit/arm64/vixl/Instructions-vixl.h" #include "jit/arm64/vixl/Instrument-vixl.h" +#include "jit/arm64/vixl/MozCachingDecoder.h" #include "jit/arm64/vixl/Simulator-Constants-vixl.h" #include "jit/arm64/vixl/Utils-vixl.h" #include "jit/IonTypes.h" @@ -697,6 +698,9 @@ class Redirection; class Simulator : public DecoderVisitor { public: +#ifdef JS_CACHE_SIMULATOR_ARM64 + using Decoder = CachingDecoder; +#endif explicit Simulator(Decoder* decoder, FILE* stream = stdout); ~Simulator(); @@ -715,6 +719,9 @@ class Simulator : public DecoderVisitor { void setGPR64Result(int64_t result); void setFP32Result(float result); void setFP64Result(double result); +#ifdef JS_CACHE_SIMULATOR_ARM64 + void FlushICache(); +#endif void VisitCallRedirection(const Instruction* instr); static uintptr_t StackLimit() { return Simulator::Current()->stackLimit(); @@ -2698,6 +2705,26 @@ class SimulatorProcess js::Mutex lock_; vixl::Redirection* redirection_; +#ifdef JS_CACHE_SIMULATOR_ARM64 + // For each simulator, record what other thread registered as instruction + // being invalidated. + struct ICacheFlush { + void* start; + size_t length; + }; + using ICacheFlushes = mozilla::Vector; + struct SimFlushes { + Simulator* thread; + ICacheFlushes records; + }; + mozilla::Vector pendingFlushes_; + + static void recordICacheFlush(void* start, size_t length); + static ICacheFlushes& getICacheFlushes(Simulator* sim); + static MOZ_MUST_USE bool registerSimulator(Simulator* sim); + static void unregisterSimulator(Simulator* sim); +#endif + static void setRedirection(vixl::Redirection* redirection) { MOZ_ASSERT(singleton_->lock_.ownedByCurrentThread()); singleton_->redirection_ = redirection; @@ -2718,6 +2745,18 @@ class SimulatorProcess } }; +// Protects the icache and redirection properties of the simulator. +class AutoLockSimulatorCache : public js::LockGuard +{ + using Base = js::LockGuard; + + public: + explicit AutoLockSimulatorCache() + : Base(SimulatorProcess::singleton_->lock_) + { + } +}; + } // namespace jit } // namespace js