2018-11-30 22:52:05 +03:00
|
|
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
2018-11-30 18:39:55 +03:00
|
|
|
* vim: set ts=8 sts=2 et sw=2 tw=80:
|
2013-08-20 10:45:26 +04:00
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
|
|
|
|
#ifndef js_ProfilingStack_h
|
|
|
|
#define js_ProfilingStack_h
|
|
|
|
|
2017-05-26 02:37:28 +03:00
|
|
|
#include <algorithm>
|
|
|
|
#include <stdint.h>
|
|
|
|
|
2013-08-20 10:45:26 +04:00
|
|
|
#include "jstypes.h"
|
2018-02-21 19:30:19 +03:00
|
|
|
|
2019-02-16 20:37:43 +03:00
|
|
|
#include "js/ProfilingCategory.h"
|
2015-06-18 08:05:42 +03:00
|
|
|
#include "js/TypeDecls.h"
|
2013-08-20 10:45:26 +04:00
|
|
|
#include "js/Utility.h"
|
|
|
|
|
2018-11-19 20:02:47 +03:00
|
|
|
class JS_PUBLIC_API JSTracer;
|
2019-12-10 01:28:19 +03:00
|
|
|
class JS_FRIEND_API ProfilingStack;
|
2017-05-26 02:51:31 +03:00
|
|
|
|
2018-05-15 08:03:11 +03:00
|
|
|
// This file defines the classes ProfilingStack and ProfilingStackFrame.
|
|
|
|
// The ProfilingStack manages an array of ProfilingStackFrames.
|
|
|
|
// It keeps track of the "label stack" and the JS interpreter stack.
|
|
|
|
// The two stack types are interleaved.
|
|
|
|
//
|
2018-02-06 00:41:29 +03:00
|
|
|
// Usage:
|
|
|
|
//
|
2018-05-15 08:03:11 +03:00
|
|
|
// ProfilingStack* profilingStack = ...;
|
2018-02-06 00:41:29 +03:00
|
|
|
//
|
2018-05-15 06:21:29 +03:00
|
|
|
// // For label frames:
|
2018-05-15 08:03:11 +03:00
|
|
|
// profilingStack->pushLabelFrame(...);
|
2018-05-15 08:14:03 +03:00
|
|
|
// // Execute some code. When finished, pop the frame:
|
2018-05-15 08:03:11 +03:00
|
|
|
// profilingStack->pop();
|
2018-02-06 00:41:29 +03:00
|
|
|
//
|
|
|
|
// // For JS stack frames:
|
2018-05-15 08:03:11 +03:00
|
|
|
// profilingStack->pushJSFrame(...);
|
2018-05-15 08:14:03 +03:00
|
|
|
// // Execute some code. When finished, pop the frame:
|
2018-05-15 08:03:11 +03:00
|
|
|
// profilingStack->pop();
|
2018-02-06 00:41:29 +03:00
|
|
|
//
|
|
|
|
//
|
|
|
|
// Concurrency considerations
|
|
|
|
//
|
2018-05-15 08:03:11 +03:00
|
|
|
// A thread's profiling stack (and the frames inside it) is only modified by
|
|
|
|
// that thread. However, the profiling stack can be *read* by a different
|
2018-02-06 00:41:29 +03:00
|
|
|
// thread, the sampler thread: Whenever the profiler wants to sample a given
|
|
|
|
// thread A, the following happens:
|
|
|
|
// (1) Thread A is suspended.
|
2018-05-15 08:03:11 +03:00
|
|
|
// (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
|
2018-05-15 08:14:03 +03:00
|
|
|
// including all ProfilingStackFrames that are currently in that stack
|
2018-05-15 08:03:11 +03:00
|
|
|
// (profilingStack->frames[0..profilingStack->stackSize()]).
|
2018-02-06 00:41:29 +03:00
|
|
|
// (3) Thread A is resumed.
|
|
|
|
//
|
|
|
|
// Thread suspension is achieved using platform-specific APIs; refer to each
|
|
|
|
// platform's Sampler::SuspendAndSampleAndResumeThread implementation in
|
|
|
|
// platform-*.cpp for details.
|
|
|
|
//
|
2018-05-15 08:03:11 +03:00
|
|
|
// When the thread is suspended, the values in profilingStack->stackPointer and
|
|
|
|
// in the stack frame range
|
|
|
|
// profilingStack->frames[0..profilingStack->stackPointer] need to be in a
|
2018-05-15 08:14:03 +03:00
|
|
|
// consistent state, so that thread S does not read partially- constructed stack
|
|
|
|
// frames. More specifically, we have two requirements:
|
|
|
|
// (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
|
|
|
|
// data needs to be put in place *before* the stackPointer is incremented,
|
|
|
|
// and the compiler + CPU need to know that this order matters.
|
|
|
|
// (2) When popping an frame from the stack and then preparing the
|
|
|
|
// ProfilingStackFrame data for the next frame that is about to be pushed,
|
|
|
|
// the decrement of the stackPointer in pop() needs to happen *before* the
|
|
|
|
// ProfilingStackFrame for the new frame is being popuplated, and the
|
|
|
|
// compiler + CPU need to know that this order matters.
|
2018-02-06 00:41:29 +03:00
|
|
|
//
|
|
|
|
// We can express the relevance of these orderings in multiple ways.
|
|
|
|
// Option A is to make stackPointer an atomic with SequentiallyConsistent
|
|
|
|
// memory ordering. This would ensure that no writes in thread A would be
|
|
|
|
// reordered across any writes to stackPointer, which satisfies requirements
|
|
|
|
// (1) and (2) at the same time. Option A is the simplest.
|
|
|
|
// Option B is to use ReleaseAcquire memory ordering both for writes to
|
2018-05-15 08:14:03 +03:00
|
|
|
// stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
|
|
|
|
// ensure that all writes that happened *before this write in program order* are
|
|
|
|
// not reordered to happen after this write. ReleaseAcquire ordering places no
|
2018-02-06 00:41:29 +03:00
|
|
|
// requirements on the ordering of writes that happen *after* this write in
|
|
|
|
// program order.
|
|
|
|
// Using release-stores for writes to stackPointer expresses requirement (1),
|
2018-05-15 08:14:03 +03:00
|
|
|
// and using release-stores for writes to the ProfilingStackFrame fields
|
|
|
|
// expresses requirement (2).
|
2018-02-06 00:41:29 +03:00
|
|
|
//
|
|
|
|
// Option B is more complicated than option A, but has much better performance
|
|
|
|
// on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
|
|
|
|
// from option A to option B reduced the overhead of pushing+popping a
|
2018-05-15 08:14:03 +03:00
|
|
|
// ProfilingStackFrame by 10 nanoseconds.
|
2018-02-06 00:41:29 +03:00
|
|
|
// On x86/64, release-stores require no explicit hardware barriers or lock
|
|
|
|
// instructions.
|
|
|
|
// On ARM/64, option B may be slower than option A, because the compiler will
|
|
|
|
// generate hardware barriers for every single release-store instead of just
|
|
|
|
// for the writes to stackPointer. However, the actual performance impact of
|
|
|
|
// this has not yet been measured on ARM, so we're currently using option B
|
|
|
|
// everywhere. This is something that we may want to change in the future once
|
|
|
|
// we've done measurements.
|
|
|
|
|
2013-08-20 10:45:26 +04:00
|
|
|
namespace js {
|
|
|
|
|
|
|
|
// A call stack can be specified to the JS engine such that all JS entry/exits
|
2018-05-15 08:14:03 +03:00
|
|
|
// to functions push/pop a stack frame to/from the specified stack.
|
2013-08-20 10:45:26 +04:00
|
|
|
//
|
2017-01-25 01:08:15 +03:00
|
|
|
// For more detailed information, see vm/GeckoProfiler.h.
|
2013-08-20 10:45:26 +04:00
|
|
|
//
|
2018-05-15 08:14:03 +03:00
|
|
|
class ProfilingStackFrame {
|
|
|
|
// A ProfilingStackFrame represents either a label frame or a JS frame.
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-02-06 00:41:29 +03:00
|
|
|
// WARNING WARNING WARNING
|
|
|
|
//
|
|
|
|
// All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
|
|
|
|
// that writes to these fields are release-writes, which ensures that
|
|
|
|
// earlier writes in this thread don't get reordered after the writes to
|
|
|
|
// these fields. In particular, the decrement of the stack pointer in
|
2018-05-15 08:03:11 +03:00
|
|
|
// ProfilingStack::pop() is a write that *must* happen before the values in
|
2018-05-15 08:14:03 +03:00
|
|
|
// this ProfilingStackFrame are changed. Otherwise, the sampler thread might
|
|
|
|
// see an inconsistent state where the stack pointer still points to a
|
|
|
|
// ProfilingStackFrame which has already been popped off the stack and whose
|
2018-02-06 00:41:29 +03:00
|
|
|
// fields have now been partially repopulated with new values.
|
|
|
|
// See the "Concurrency considerations" paragraph at the top of this file
|
|
|
|
// for more details.
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-11-06 07:33:07 +03:00
|
|
|
// Descriptive label for this stack frame. Must be a static string! Can be
|
2018-05-15 08:14:03 +03:00
|
|
|
// an empty string, but not a null pointer.
|
2020-02-27 20:39:15 +03:00
|
|
|
mozilla::Atomic<const char*, mozilla::ReleaseAcquire> label_;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
// An additional descriptive string of this frame which is combined with
|
|
|
|
// |label_| in profiler output. Need not be (and usually isn't) static. Can
|
|
|
|
// be null.
|
2020-02-27 20:39:15 +03:00
|
|
|
mozilla::Atomic<const char*, mozilla::ReleaseAcquire> dynamicString_;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
// Stack pointer for non-JS stack frames, the script pointer otherwise.
|
2020-02-27 20:39:15 +03:00
|
|
|
mozilla::Atomic<void*, mozilla::ReleaseAcquire> spOrScript;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2020-09-01 07:50:52 +03:00
|
|
|
// ID of the JS Realm for JS stack frames.
|
2018-05-15 08:14:03 +03:00
|
|
|
// Must not be used on non-JS frames; it'll contain either the default 0,
|
|
|
|
// or a leftover value from a previous JS stack frame that was using this
|
2017-05-18 10:17:46 +03:00
|
|
|
// ProfilingStackFrame object.
|
2020-09-01 07:50:52 +03:00
|
|
|
mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2020-09-01 07:50:52 +03:00
|
|
|
// The bytecode offset for JS stack frames.
|
2019-11-15 11:01:30 +03:00
|
|
|
// Must not be used on non-JS frames; it'll contain either the default 0,
|
|
|
|
// or a leftover value from a previous JS stack frame that was using this
|
|
|
|
// ProfilingStackFrame object.
|
2020-09-01 07:50:52 +03:00
|
|
|
mozilla::Atomic<int32_t, mozilla::ReleaseAcquire> pcOffsetIfJS_;
|
2019-11-15 11:01:30 +03:00
|
|
|
|
2019-02-16 20:37:58 +03:00
|
|
|
// Bits 0...8 hold the Flags. Bits 9...31 hold the category pair.
|
2020-02-27 20:39:15 +03:00
|
|
|
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> flagsAndCategoryPair_;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
static int32_t pcToOffset(JSScript* aScript, jsbytecode* aPc);
|
2018-11-30 13:46:48 +03:00
|
|
|
|
|
|
|
public:
|
2018-05-15 08:14:03 +03:00
|
|
|
ProfilingStackFrame() = default;
|
|
|
|
ProfilingStackFrame& operator=(const ProfilingStackFrame& other) {
|
2018-03-18 18:58:44 +03:00
|
|
|
label_ = other.label();
|
|
|
|
dynamicString_ = other.dynamicString();
|
2018-05-15 08:14:03 +03:00
|
|
|
void* spScript = other.spOrScript;
|
|
|
|
spOrScript = spScript;
|
Bug 1499507 - Don't collect line numbers for profiling stack frames. r=njn
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
2018-11-06 07:31:02 +03:00
|
|
|
int32_t offsetIfJS = other.pcOffsetIfJS_;
|
|
|
|
pcOffsetIfJS_ = offsetIfJS;
|
2019-11-15 11:01:30 +03:00
|
|
|
uint64_t realmID = other.realmID_;
|
|
|
|
realmID_ = realmID;
|
2019-02-16 20:37:43 +03:00
|
|
|
uint32_t flagsAndCategory = other.flagsAndCategoryPair_;
|
|
|
|
flagsAndCategoryPair_ = flagsAndCategory;
|
2018-05-15 08:14:03 +03:00
|
|
|
return *this;
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2020-06-09 02:46:16 +03:00
|
|
|
// Reserve up to 16 bits for flags, and 16 for category pair.
|
2018-11-06 07:32:29 +03:00
|
|
|
enum class Flags : uint32_t {
|
|
|
|
// The first three flags describe the kind of the frame and are
|
2018-05-15 08:14:03 +03:00
|
|
|
// mutually exclusive. (We still give them individual bits for
|
2018-07-21 17:37:45 +03:00
|
|
|
// simplicity.)
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-07-21 17:37:45 +03:00
|
|
|
// A regular label frame. These usually come from AutoProfilerLabel.
|
2018-11-06 07:32:29 +03:00
|
|
|
IS_LABEL_FRAME = 1 << 0,
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Don't collect line numbers for profiling stack frames. r=njn
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
2018-11-06 07:31:02 +03:00
|
|
|
// A special frame indicating the start of a run of JS profiling stack
|
|
|
|
// frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp
|
|
|
|
// field. These frames are needed to get correct ordering between JS
|
|
|
|
// and LABEL frames because JS frames don't carry sp information.
|
|
|
|
// SP is short for "stack pointer".
|
2018-11-06 07:32:29 +03:00
|
|
|
IS_SP_MARKER_FRAME = 1 << 1,
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Don't collect line numbers for profiling stack frames. r=njn
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
2018-11-06 07:31:02 +03:00
|
|
|
// A JS frame.
|
2018-11-06 07:32:29 +03:00
|
|
|
IS_JS_FRAME = 1 << 2,
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Don't collect line numbers for profiling stack frames. r=njn
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
2018-11-06 07:31:02 +03:00
|
|
|
// An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME
|
|
|
|
// frames can have this flag set and unset during their lifetime.
|
2018-11-06 07:32:29 +03:00
|
|
|
// JS_OSR frames are ignored.
|
|
|
|
JS_OSR = 1 << 3,
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Don't collect line numbers for profiling stack frames. r=njn
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
2018-11-06 07:31:02 +03:00
|
|
|
// The next three are mutually exclusive.
|
|
|
|
// By default, for profiling stack frames that have both a label and a
|
|
|
|
// dynamic string, the two strings are combined into one string of the
|
|
|
|
// form "<label> <dynamicString>" during JSON serialization. The
|
2018-11-14 21:52:29 +03:00
|
|
|
// following flags can be used to change this preset.
|
|
|
|
STRING_TEMPLATE_METHOD = 1 << 4, // "<label>.<dynamicString>"
|
2018-11-06 07:33:07 +03:00
|
|
|
STRING_TEMPLATE_GETTER = 1 << 5, // "get <label>.<dynamicString>"
|
2018-07-21 17:37:45 +03:00
|
|
|
STRING_TEMPLATE_SETTER = 1 << 6, // "set <label>.<dynamicString>"
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-07-21 17:37:45 +03:00
|
|
|
// If set, causes this stack frame to be marked as "relevantForJS" in
|
|
|
|
// the profile JSON, which will make it show up in the "JS only" call
|
2018-11-14 21:52:29 +03:00
|
|
|
// tree view.
|
2018-07-21 17:37:45 +03:00
|
|
|
RELEVANT_FOR_JS = 1 << 7,
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2019-02-16 20:37:58 +03:00
|
|
|
// If set, causes the label on this ProfilingStackFrame to be ignored
|
|
|
|
// and to be replaced by the subcategory's label.
|
|
|
|
LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8,
|
|
|
|
|
2020-06-09 02:46:16 +03:00
|
|
|
// Frame dynamic string does not contain user data.
|
2019-11-11 23:27:44 +03:00
|
|
|
NONSENSITIVE = 1 << 9,
|
|
|
|
|
2020-06-09 15:27:15 +03:00
|
|
|
// A JS Baseline Interpreter frame.
|
|
|
|
IS_BLINTERP_FRAME = 1 << 10,
|
|
|
|
|
2020-06-09 02:46:16 +03:00
|
|
|
FLAGS_BITCOUNT = 16,
|
2018-11-06 07:32:29 +03:00
|
|
|
FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1
|
2018-11-30 13:46:48 +03:00
|
|
|
};
|
|
|
|
|
2019-02-16 20:37:43 +03:00
|
|
|
static_assert(
|
|
|
|
uint32_t(JS::ProfilingCategoryPair::LAST) <=
|
|
|
|
(UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)),
|
|
|
|
"Too many category pairs to fit into u32 with together with the "
|
|
|
|
"reserved bits for the flags");
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-11-06 07:32:29 +03:00
|
|
|
bool isLabelFrame() const {
|
2019-02-16 20:37:43 +03:00
|
|
|
return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME);
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2019-11-11 23:27:44 +03:00
|
|
|
bool isNonsensitive() const {
|
|
|
|
return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::NONSENSITIVE);
|
|
|
|
}
|
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
bool isSpMarkerFrame() const {
|
2019-02-16 20:37:43 +03:00
|
|
|
return uint32_t(flagsAndCategoryPair_) &
|
|
|
|
uint32_t(Flags::IS_SP_MARKER_FRAME);
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2018-03-18 18:58:44 +03:00
|
|
|
bool isJsFrame() const {
|
2019-02-16 20:37:43 +03:00
|
|
|
return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME);
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2020-06-09 15:27:15 +03:00
|
|
|
bool isJsBlinterpFrame() const {
|
|
|
|
return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_BLINTERP_FRAME);
|
|
|
|
}
|
|
|
|
|
Bug 1499507 - Don't collect line numbers for profiling stack frames. r=njn
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
2018-11-06 07:31:02 +03:00
|
|
|
bool isOSRFrame() const {
|
2019-02-16 20:37:43 +03:00
|
|
|
return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR);
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
Bug 1499507 - Don't collect line numbers for profiling stack frames. r=njn
They were not displayed in the UI, and the instructions to initialize the line
field of a stack frame increased code size unnecessarily.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
@@ -20,17 +20,16 @@
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
- movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
Depends on D9193
Differential Revision: https://phabricator.services.mozilla.com/D9195
--HG--
extra : moz-landing-system : lando
2018-11-06 07:31:02 +03:00
|
|
|
void setIsOSRFrame(bool isOSR) {
|
|
|
|
if (isOSR) {
|
2019-02-16 20:37:43 +03:00
|
|
|
flagsAndCategoryPair_ =
|
|
|
|
uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR);
|
2018-11-30 13:46:48 +03:00
|
|
|
} else {
|
2019-02-16 20:37:43 +03:00
|
|
|
flagsAndCategoryPair_ =
|
|
|
|
uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR);
|
2018-03-18 18:58:44 +03:00
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2019-02-16 20:37:58 +03:00
|
|
|
const char* label() const {
|
|
|
|
uint32_t flagsAndCategoryPair = flagsAndCategoryPair_;
|
|
|
|
if (flagsAndCategoryPair &
|
|
|
|
uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) {
|
|
|
|
auto categoryPair = JS::ProfilingCategoryPair(
|
|
|
|
flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT));
|
|
|
|
return JS::GetProfilingCategoryPairInfo(categoryPair).mLabel;
|
|
|
|
}
|
|
|
|
return label_;
|
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-11-06 07:32:29 +03:00
|
|
|
const char* dynamicString() const { return dynamicString_; }
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-11-06 07:32:29 +03:00
|
|
|
void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
|
2019-02-16 20:37:43 +03:00
|
|
|
JS::ProfilingCategoryPair aCategoryPair,
|
|
|
|
uint32_t aFlags) {
|
2018-11-06 07:32:29 +03:00
|
|
|
label_ = aLabel;
|
2018-05-15 06:30:32 +03:00
|
|
|
dynamicString_ = aDynamicString;
|
|
|
|
spOrScript = sp;
|
2018-11-06 07:32:29 +03:00
|
|
|
// pcOffsetIfJS_ is not set and must not be used on label frames.
|
2019-02-16 20:37:43 +03:00
|
|
|
flagsAndCategoryPair_ =
|
2018-11-06 07:33:07 +03:00
|
|
|
uint32_t(Flags::IS_LABEL_FRAME) |
|
2019-02-16 20:37:43 +03:00
|
|
|
(uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags;
|
2018-11-06 07:32:29 +03:00
|
|
|
MOZ_ASSERT(isLabelFrame());
|
2014-05-29 02:44:41 +04:00
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2016-03-22 12:17:20 +03:00
|
|
|
void initSpMarkerFrame(void* sp) {
|
2018-05-15 06:30:32 +03:00
|
|
|
label_ = "";
|
2016-03-22 12:17:20 +03:00
|
|
|
dynamicString_ = nullptr;
|
|
|
|
spOrScript = sp;
|
|
|
|
// pcOffsetIfJS_ is not set and must not be used on sp marker frames.
|
2019-02-16 20:37:43 +03:00
|
|
|
flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) |
|
|
|
|
(uint32_t(JS::ProfilingCategoryPair::OTHER)
|
|
|
|
<< uint32_t(Flags::FLAGS_BITCOUNT));
|
2018-05-15 06:30:32 +03:00
|
|
|
MOZ_ASSERT(isSpMarkerFrame());
|
2017-06-02 05:46:09 +03:00
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2020-06-15 17:56:50 +03:00
|
|
|
template <JS::ProfilingCategoryPair Category, uint32_t ExtraFlags = 0>
|
2018-05-15 06:21:29 +03:00
|
|
|
void initJsFrame(const char* aLabel, const char* aDynamicString,
|
2019-11-15 11:01:30 +03:00
|
|
|
JSScript* aScript, jsbytecode* aPc, uint64_t aRealmID) {
|
2017-05-26 02:51:31 +03:00
|
|
|
label_ = aLabel;
|
2018-11-06 07:32:29 +03:00
|
|
|
dynamicString_ = aDynamicString;
|
2018-02-06 00:41:29 +03:00
|
|
|
spOrScript = aScript;
|
2018-11-06 07:32:29 +03:00
|
|
|
pcOffsetIfJS_ = pcToOffset(aScript, aPc);
|
2019-11-15 11:01:30 +03:00
|
|
|
realmID_ = aRealmID;
|
2020-06-15 17:56:50 +03:00
|
|
|
flagsAndCategoryPair_ =
|
|
|
|
(uint32_t(Category) << uint32_t(Flags::FLAGS_BITCOUNT)) |
|
|
|
|
uint32_t(Flags::IS_JS_FRAME) | ExtraFlags;
|
2018-05-15 06:30:32 +03:00
|
|
|
MOZ_ASSERT(isJsFrame());
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2018-11-06 07:33:45 +03:00
|
|
|
uint32_t flags() const {
|
2019-02-16 20:37:43 +03:00
|
|
|
return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK);
|
2017-05-26 02:51:31 +03:00
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2019-02-16 20:37:43 +03:00
|
|
|
JS::ProfilingCategoryPair categoryPair() const {
|
|
|
|
return JS::ProfilingCategoryPair(flagsAndCategoryPair_ >>
|
|
|
|
uint32_t(Flags::FLAGS_BITCOUNT));
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2019-11-15 11:01:30 +03:00
|
|
|
uint64_t realmID() const { return realmID_; }
|
|
|
|
|
2018-11-06 07:32:29 +03:00
|
|
|
void* stackAddress() const {
|
2018-05-15 06:30:32 +03:00
|
|
|
MOZ_ASSERT(!isJsFrame());
|
|
|
|
return spOrScript;
|
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2017-05-26 02:51:31 +03:00
|
|
|
JS_PUBLIC_API JSScript* script() const;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2017-05-26 02:51:31 +03:00
|
|
|
// Note that the pointer returned might be invalid.
|
|
|
|
JSScript* rawScript() const {
|
|
|
|
MOZ_ASSERT(isJsFrame());
|
|
|
|
void* script = spOrScript;
|
2014-05-29 02:44:41 +04:00
|
|
|
return static_cast<JSScript*>(script);
|
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-11-19 20:02:47 +03:00
|
|
|
// We can't know the layout of JSScript, so look in vm/GeckoProfiler.cpp.
|
|
|
|
JS_FRIEND_API jsbytecode* pc() const;
|
2017-06-02 10:16:56 +03:00
|
|
|
void setPC(jsbytecode* pc);
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2017-06-02 10:16:56 +03:00
|
|
|
void trace(JSTracer* trc);
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2014-05-29 02:44:41 +04:00
|
|
|
// The offset of a pc into a script's code can actually be 0, so to
|
|
|
|
// signify a nullptr pc, use a -1 index. This is checked against in
|
|
|
|
// pc() and setPC() to set/get the right pc.
|
|
|
|
static const int32_t NullPCOffset = -1;
|
2013-08-20 10:45:26 +04:00
|
|
|
};
|
|
|
|
|
2018-05-15 08:03:11 +03:00
|
|
|
JS_FRIEND_API void SetContextProfilingStack(JSContext* cx,
|
|
|
|
ProfilingStack* profilingStack);
|
2013-08-20 10:45:26 +04:00
|
|
|
|
2018-01-05 16:35:00 +03:00
|
|
|
// GetContextProfilingStack also exists, but it's defined in RootingAPI.h.
|
|
|
|
|
2016-08-11 15:39:22 +03:00
|
|
|
JS_FRIEND_API void EnableContextProfilingStack(JSContext* cx, bool enabled);
|
2013-08-20 10:45:26 +04:00
|
|
|
|
2016-08-11 15:39:22 +03:00
|
|
|
JS_FRIEND_API void RegisterContextProfilingEventMarker(JSContext* cx,
|
|
|
|
void (*fn)(const char*));
|
2014-03-03 23:36:08 +04:00
|
|
|
|
2013-08-20 10:45:26 +04:00
|
|
|
} // namespace js
|
|
|
|
|
2018-06-08 01:37:08 +03:00
|
|
|
namespace JS {
|
|
|
|
|
|
|
|
typedef ProfilingStack* (*RegisterThreadCallback)(const char* threadName,
|
|
|
|
void* stackBase);
|
|
|
|
|
|
|
|
typedef void (*UnregisterThreadCallback)();
|
|
|
|
|
2020-01-10 13:50:32 +03:00
|
|
|
// regiserThread and unregisterThread callbacks are functions which are called
|
|
|
|
// by other threads without any locking mechanism.
|
2018-06-08 01:37:08 +03:00
|
|
|
JS_FRIEND_API void SetProfilingThreadCallbacks(
|
|
|
|
RegisterThreadCallback registerThread,
|
|
|
|
UnregisterThreadCallback unregisterThread);
|
|
|
|
|
|
|
|
} // namespace JS
|
|
|
|
|
2018-05-15 08:03:11 +03:00
|
|
|
// Each thread has its own ProfilingStack. That thread modifies the
|
2017-06-02 10:16:56 +03:00
|
|
|
// ProfilingStack, pushing and popping elements as necessary.
|
|
|
|
//
|
2018-05-15 08:03:11 +03:00
|
|
|
// The ProfilingStack is also read periodically by the profiler's sampler
|
|
|
|
// thread. This happens only when the thread that owns the ProfilingStack is
|
|
|
|
// suspended. So there are no genuine parallel accesses.
|
2017-06-02 10:16:56 +03:00
|
|
|
//
|
|
|
|
// However, it is possible for pushing/popping to be interrupted by a periodic
|
|
|
|
// sample. Because of this, we need pushing/popping to be effectively atomic.
|
|
|
|
//
|
2018-05-15 08:14:03 +03:00
|
|
|
// - When pushing a new frame, we increment the stack pointer -- making the new
|
|
|
|
// frame visible to the sampler thread -- only after the new frame has been
|
2018-02-06 00:41:29 +03:00
|
|
|
// fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
|
|
|
|
// the increment is a release-store, which ensures that this store is not
|
2018-05-15 08:14:03 +03:00
|
|
|
// reordered before the writes of the frame.
|
2017-06-02 10:16:56 +03:00
|
|
|
//
|
2018-05-15 08:14:03 +03:00
|
|
|
// - When popping an old frame, the only operation is the decrementing of the
|
2017-06-02 10:16:56 +03:00
|
|
|
// stack pointer, which is obviously atomic.
|
|
|
|
//
|
2019-12-10 01:28:19 +03:00
|
|
|
class JS_FRIEND_API ProfilingStack final {
|
2017-05-26 02:37:28 +03:00
|
|
|
public:
|
2020-03-16 16:47:02 +03:00
|
|
|
ProfilingStack() = default;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 08:03:11 +03:00
|
|
|
~ProfilingStack();
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 06:30:32 +03:00
|
|
|
void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
|
2019-02-16 20:37:43 +03:00
|
|
|
JS::ProfilingCategoryPair categoryPair,
|
2018-11-06 07:33:45 +03:00
|
|
|
uint32_t flags = 0) {
|
Bug 1499507 - Allow the compiler to generate a non-atomic increment instruction for the stack pointer increment. r=njn
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
2018-11-06 07:35:13 +03:00
|
|
|
// This thread is the only one that ever changes the value of
|
|
|
|
// stackPointer.
|
|
|
|
// Store the value of the atomic in a non-atomic local variable so that
|
|
|
|
// the compiler won't generate two separate loads from the atomic for
|
|
|
|
// the size check and the frames[] array indexing operation.
|
|
|
|
uint32_t stackPointerVal = stackPointer;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Allow the compiler to generate a non-atomic increment instruction for the stack pointer increment. r=njn
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
2018-11-06 07:35:13 +03:00
|
|
|
if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
ensureCapacitySlow();
|
2018-05-15 06:30:32 +03:00
|
|
|
}
|
2019-02-16 20:37:43 +03:00
|
|
|
frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
|
|
|
|
categoryPair, flags);
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 06:30:32 +03:00
|
|
|
// This must happen at the end! The compiler will not reorder this
|
|
|
|
// update because stackPointer is Atomic<..., ReleaseAcquire>, so any
|
|
|
|
// the writes above will not be reordered below the stackPointer store.
|
|
|
|
// Do the read and the write as two separate statements, in order to
|
|
|
|
// make it clear that we don't need an atomic increment, which would be
|
|
|
|
// more expensive on x86 than the separate operations done here.
|
Bug 1499507 - Allow the compiler to generate a non-atomic increment instruction for the stack pointer increment. r=njn
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
2018-11-06 07:35:13 +03:00
|
|
|
// However, don't use stackPointerVal here; instead, allow the compiler
|
|
|
|
// to turn this store into a non-atomic increment instruction which
|
|
|
|
// takes up less code size.
|
|
|
|
stackPointer = stackPointer + 1;
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2018-05-15 06:30:32 +03:00
|
|
|
void pushSpMarkerFrame(void* sp) {
|
2018-03-18 18:58:44 +03:00
|
|
|
uint32_t oldStackPointer = stackPointer;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
|
|
|
|
ensureCapacitySlow();
|
2017-05-26 02:51:31 +03:00
|
|
|
}
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
frames[oldStackPointer].initSpMarkerFrame(sp);
|
2017-05-26 02:37:28 +03:00
|
|
|
|
Bug 1499507 - Allow the compiler to generate a non-atomic increment instruction for the stack pointer increment. r=njn
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
2018-11-06 07:35:13 +03:00
|
|
|
// This must happen at the end, see the comment in pushLabelFrame.
|
2018-03-18 18:58:44 +03:00
|
|
|
stackPointer = oldStackPointer + 1;
|
2018-09-06 13:11:07 +03:00
|
|
|
}
|
2017-05-26 02:37:28 +03:00
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
void pushJsFrame(const char* label, const char* dynamicString,
|
2019-11-15 11:01:30 +03:00
|
|
|
JSScript* script, jsbytecode* pc, uint64_t aRealmID) {
|
2018-05-15 08:14:03 +03:00
|
|
|
// This thread is the only one that ever changes the value of
|
|
|
|
// stackPointer. Only load the atomic once.
|
Bug 1499507 - Allow the compiler to generate a non-atomic increment instruction for the stack pointer increment. r=njn
This change reduces the binary size on macOS x64 by around 50KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build. It's a bit hard to read because %r12 and %rbx swap their
function, but what happens in this method is that "movq %r12, %rcx" goes
away, and the two instructions "leal 0x1(%r12) %eax" and
"movl %eax, 0x10(%rbx)" turn into an "incl 0x10(%r12)".
So the old code was preserving the original value of profilingStack->stackPointer
in a register, and then using it later to compute the incremented stackPointer.
The new code uses an "incl" instruction for the stackPointer increment and
doesn't worry that the stackPointer value might have changed since the stack
size check at the start of the function. (It can't have changed.)
before: %rbx has the ProfilingStack*, %r12 has profilingStack->stackPointer
after: %r12 has the ProfilingStack*, %rbx has profilingStack->stackPointer
@@ -3,37 +3,35 @@
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq 0x80(%rdi), %rbx
- movq %rbx, -40(%rbp)
- testq %rbx, %rbx
+ movq 0x80(%rdi), %r12
+ movq %r12, -40(%rbp)
+ testq %r12, %r12
je loc_xxxxx
- movl 0x10(%rbx), %r12d
- cmpl (%rbx), %r12d
+ movl 0x10(%r12), %ebx
+ cmpl (%r12), %ebx
jae loc_xxxxx
- movq 0x8(%rbx), %rax
- movq %r12, %rcx
- shlq $0x5, %rcx
- leaq aAttr, %rdx ; "Attr"
- movq %rdx, (%rax,%rcx)
- leaq aSpecified, %rdx ; "specified"
- movq %rdx, 0x8(%rax,%rcx)
- leaq -40(%rbp), %rdx
- movq %rdx, 0x10(%rax,%rcx)
- movl $0x3a1, 0x1c(%rax,%rcx)
- leal 0x1(%r12), %eax
- movl %eax, 0x10(%rbx)
+ movq 0x8(%r12), %rax
+ shlq $0x5, %rbx
+ leaq aAttr, %rcx ; "Attr"
+ movq %rcx, (%rax,%rbx)
+ leaq aSpecified, %rcx ; "specified"
+ movq %rcx, 0x8(%rax,%rbx)
+ leaq -40(%rbp), %rcx
+ movq %rcx, 0x10(%rax,%rbx)
+ movl $0x3a1, 0x1c(%rax,%rbx)
+ incl 0x10(%r12)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
movq %rcx, (%r14)
movq -40(%rbp), %rax
@@ -47,11 +45,11 @@
popq %rbx
popq %r12
popq %r14
popq %r15
popq %rbp
ret
; endp
- movq %rbx, %rdi
+ movq %r12, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
jmp loc_xxxxx
Depends on D9205
Differential Revision: https://phabricator.services.mozilla.com/D9206
--HG--
extra : moz-landing-system : lando
2018-11-06 07:35:13 +03:00
|
|
|
uint32_t oldStackPointer = stackPointer;
|
2017-05-26 02:37:28 +03:00
|
|
|
|
2017-08-02 21:36:43 +03:00
|
|
|
if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
|
|
|
|
ensureCapacitySlow();
|
2017-05-26 02:37:28 +03:00
|
|
|
}
|
2020-06-15 17:56:50 +03:00
|
|
|
frames[oldStackPointer]
|
|
|
|
.initJsFrame<JS::ProfilingCategoryPair::JS_Interpreter>(
|
|
|
|
label, dynamicString, script, pc, aRealmID);
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
// This must happen at the end, see the comment in pushLabelFrame.
|
2017-08-02 21:36:43 +03:00
|
|
|
stackPointer = stackPointer + 1;
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
void pop() {
|
2017-05-26 02:51:31 +03:00
|
|
|
MOZ_ASSERT(stackPointer > 0);
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
// Do the read and the write as two separate statements, in order to
|
2017-08-02 21:36:43 +03:00
|
|
|
// make it clear that we don't need an atomic decrement, which would be
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
// more expensive on x86 than the separate operations done here.
|
2017-08-02 21:36:43 +03:00
|
|
|
// This thread is the only one that ever changes the value of
|
|
|
|
// stackPointer.
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
uint32_t oldStackPointer = stackPointer;
|
2018-05-15 08:14:03 +03:00
|
|
|
stackPointer = oldStackPointer - 1;
|
2018-11-30 13:46:48 +03:00
|
|
|
}
|
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
uint32_t stackSize() const { return stackPointer; }
|
2018-10-05 03:52:43 +03:00
|
|
|
uint32_t stackCapacity() const { return capacity; }
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2017-05-26 02:37:28 +03:00
|
|
|
private:
|
2018-03-18 18:58:44 +03:00
|
|
|
// Out of line path for expanding the buffer, since otherwise this would get
|
|
|
|
// inlined in every DOM WebIDL call.
|
Bug 1499507 - Make ensureCapacitySlow infallible. r=emilio
This eliminates a few instructions from each inlined instance of
AutoProfilerLabel because we no longer need to handle allocation failure in the
inlined code.
I think this allocation should be fine to make infallible: The allocation size
is limited by the thread's stack depth, and we only hit this code path when the
stack is the deepest it's ever been during the thread's life time.
This change reduces the binary size on Linux x64 by around 100KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build, it really just eliminates one test and one jump at the very end
of the method:
@@ -9,30 +9,29 @@
movq %rcx, %r14
movq %rdx, %r15
movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
- cmpl %r12d, (%rbx)
- jbe loc_xxxxx
+ cmpl (%rbx), %r12d
+ jae loc_xxxxx
movq 0x8(%rbx), %rax
movq %r12, %rcx
shlq $0x5, %rcx
leaq aGetAttrspecifi, %rdx ; "get Attr.specified"
movq %rdx, (%rax,%rcx)
movq $0x0, 0x8(%rax,%rcx)
leaq -40(%rbp), %rdx
movq %rdx, 0x10(%rax,%rcx)
movl $0x106, 0x18(%rax,%rcx)
movl $0x1c, 0x1c(%rax,%rcx)
-
leal 0x1(%r12), %eax
movl %eax, 0x10(%rbx)
movq %r15, %rdi
call __ZNK7mozilla3dom4Attr9SpecifiedEv ; mozilla::dom::Attr::Specified() const
movzxl %al, %eax
movabsq $0xfff9000000000000, %rcx
orq %rax, %rcx
@@ -50,12 +49,9 @@
popq %r14
popq %r15
popq %rbp
ret
; endp
movq %rbx, %rdi
call __ZN14ProfilingStack18ensureCapacitySlowEv ; ProfilingStack::ensureCapacitySlow()
- testb %al, %al
- jne loc_xxxxx
-
jmp loc_xxxxx
Depends on D9192
Differential Revision: https://phabricator.services.mozilla.com/D9193
--HG--
extra : moz-landing-system : lando
2018-11-06 07:30:13 +03:00
|
|
|
MOZ_COLD void ensureCapacitySlow();
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2017-05-26 02:37:28 +03:00
|
|
|
// No copying.
|
2018-05-15 08:03:11 +03:00
|
|
|
ProfilingStack(const ProfilingStack&) = delete;
|
|
|
|
void operator=(const ProfilingStack&) = delete;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-03-18 18:58:44 +03:00
|
|
|
// No moving either.
|
2018-05-15 08:03:11 +03:00
|
|
|
ProfilingStack(ProfilingStack&&) = delete;
|
|
|
|
void operator=(ProfilingStack&&) = delete;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
uint32_t capacity = 0;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2017-05-26 02:37:28 +03:00
|
|
|
public:
|
2018-05-15 08:14:03 +03:00
|
|
|
// The pointer to the stack frames, this is read from the profiler thread and
|
2018-03-18 18:58:44 +03:00
|
|
|
// written from the current thread.
|
|
|
|
//
|
|
|
|
// This is effectively a unique pointer.
|
2020-02-27 20:39:15 +03:00
|
|
|
mozilla::Atomic<js::ProfilingStackFrame*, mozilla::SequentiallyConsistent>
|
2018-07-21 17:37:45 +03:00
|
|
|
frames{nullptr};
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 08:14:03 +03:00
|
|
|
// This may exceed the capacity, so instead use the stackSize() method to
|
|
|
|
// determine the number of valid frames in stackFrames. When this is less
|
|
|
|
// than stackCapacity(), it refers to the first free stackframe past the top
|
|
|
|
// of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
|
|
|
|
// frame).
|
2018-02-06 00:41:29 +03:00
|
|
|
//
|
|
|
|
// WARNING WARNING WARNING
|
|
|
|
//
|
|
|
|
// This is an atomic variable that uses ReleaseAcquire memory ordering.
|
|
|
|
// See the "Concurrency considerations" paragraph at the top of this file
|
|
|
|
// for more details.
|
2020-03-16 16:47:02 +03:00
|
|
|
mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stackPointer{0};
|
2017-05-26 02:37:28 +03:00
|
|
|
};
|
|
|
|
|
2018-01-05 16:35:00 +03:00
|
|
|
namespace js {
|
|
|
|
|
|
|
|
class AutoGeckoProfilerEntry;
|
|
|
|
class GeckoProfilerEntryMarker;
|
|
|
|
class GeckoProfilerBaselineOSRMarker;
|
|
|
|
|
|
|
|
class GeckoProfilerThread {
|
|
|
|
friend class AutoGeckoProfilerEntry;
|
|
|
|
friend class GeckoProfilerEntryMarker;
|
|
|
|
friend class GeckoProfilerBaselineOSRMarker;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-05-15 08:03:11 +03:00
|
|
|
ProfilingStack* profilingStack_;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Fold the 'profiler is active' check into the 'JSContext has a non-null PseudoStack' check. r=sfink
This eliminates a few instructions from every profiler label and saves code size.
We have around 9000 WebIDL constructors + methods + getters + setters which all
have an inlined instance of this code.
This change reduces the binary size on Linux x64 by around 160KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT, %rax ; __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT
- movl (%rax), %eax
- testl %eax, %eax
- js loc_xxxxx
-
- movq $0x0, -40(%rbp)
- jmp loc_xxxxx
-
- movq 0x78(%rdi), %rbx
+ movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
cmpl %r12d, (%rbx)
jbe loc_xxxxx
Differential Revision: https://phabricator.services.mozilla.com/D9192
--HG--
extra : moz-landing-system : lando
2018-11-06 07:29:35 +03:00
|
|
|
// Same as profilingStack_ if the profiler is currently active, otherwise
|
|
|
|
// null.
|
|
|
|
ProfilingStack* profilingStackIfEnabled_;
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-01-05 16:35:00 +03:00
|
|
|
public:
|
|
|
|
GeckoProfilerThread();
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-10-05 03:52:43 +03:00
|
|
|
uint32_t stackPointer() {
|
|
|
|
MOZ_ASSERT(infraInstalled());
|
|
|
|
return profilingStack_->stackPointer;
|
|
|
|
}
|
2018-05-15 08:03:11 +03:00
|
|
|
ProfilingStackFrame* stack() { return profilingStack_->frames; }
|
|
|
|
ProfilingStack* getProfilingStack() { return profilingStack_; }
|
Bug 1499507 - Fold the 'profiler is active' check into the 'JSContext has a non-null PseudoStack' check. r=sfink
This eliminates a few instructions from every profiler label and saves code size.
We have around 9000 WebIDL constructors + methods + getters + setters which all
have an inlined instance of this code.
This change reduces the binary size on Linux x64 by around 160KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT, %rax ; __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT
- movl (%rax), %eax
- testl %eax, %eax
- js loc_xxxxx
-
- movq $0x0, -40(%rbp)
- jmp loc_xxxxx
-
- movq 0x78(%rdi), %rbx
+ movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
cmpl %r12d, (%rbx)
jbe loc_xxxxx
Differential Revision: https://phabricator.services.mozilla.com/D9192
--HG--
extra : moz-landing-system : lando
2018-11-06 07:29:35 +03:00
|
|
|
ProfilingStack* getProfilingStackIfEnabled() {
|
|
|
|
return profilingStackIfEnabled_;
|
|
|
|
}
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-10-05 03:52:43 +03:00
|
|
|
/*
|
|
|
|
* True if the profiler infrastructure is setup. Should be true in builds
|
|
|
|
* that include profiler support except during early startup or late
|
|
|
|
* shutdown. Unrelated to the presence of the Gecko Profiler addon.
|
|
|
|
*/
|
|
|
|
bool infraInstalled() { return profilingStack_ != nullptr; }
|
2018-11-30 13:46:48 +03:00
|
|
|
|
Bug 1499507 - Fold the 'profiler is active' check into the 'JSContext has a non-null PseudoStack' check. r=sfink
This eliminates a few instructions from every profiler label and saves code size.
We have around 9000 WebIDL constructors + methods + getters + setters which all
have an inlined instance of this code.
This change reduces the binary size on Linux x64 by around 160KB.
Here's a diff of the impact on the code generated for Attr_Binding::get_specified
in the Mac build:
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r12
pushq %rbx
subq $0x10, %rsp
movq %rcx, %r14
movq %rdx, %r15
- movq __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT, %rax ; __ZN7mozilla8profiler6detail12RacyFeatures18sActiveAndFeaturesE@GOT
- movl (%rax), %eax
- testl %eax, %eax
- js loc_xxxxx
-
- movq $0x0, -40(%rbp)
- jmp loc_xxxxx
-
- movq 0x78(%rdi), %rbx
+ movq 0x80(%rdi), %rbx
movq %rbx, -40(%rbp)
testq %rbx, %rbx
je loc_xxxxx
movl 0x10(%rbx), %r12d
cmpl %r12d, (%rbx)
jbe loc_xxxxx
Differential Revision: https://phabricator.services.mozilla.com/D9192
--HG--
extra : moz-landing-system : lando
2018-11-06 07:29:35 +03:00
|
|
|
void setProfilingStack(ProfilingStack* profilingStack, bool enabled);
|
|
|
|
void enable(bool enable) {
|
|
|
|
profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr;
|
|
|
|
}
|
2018-01-05 16:35:00 +03:00
|
|
|
void trace(JSTracer* trc);
|
2018-11-30 13:46:48 +03:00
|
|
|
|
2018-01-05 16:35:00 +03:00
|
|
|
/*
|
|
|
|
* Functions which are the actual instrumentation to track run information
|
|
|
|
*
|
|
|
|
* - enter: a function has started to execute
|
|
|
|
* - updatePC: updates the pc information about where a function
|
|
|
|
* is currently executing
|
|
|
|
* - exit: this function has ceased execution, and no further
|
|
|
|
* entries/exits will be made
|
|
|
|
*/
|
2019-05-03 13:15:51 +03:00
|
|
|
bool enter(JSContext* cx, JSScript* script);
|
|
|
|
void exit(JSContext* cx, JSScript* script);
|
2018-01-05 16:35:00 +03:00
|
|
|
inline void updatePC(JSContext* cx, JSScript* script, jsbytecode* pc);
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace js
|
|
|
|
|
2013-08-20 10:45:26 +04:00
|
|
|
#endif /* js_ProfilingStack_h */
|