зеркало из https://github.com/mozilla/gecko-dev.git
2082 строки
71 KiB
C++
2082 строки
71 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "LulMain.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h> // write(), only for testing LUL
|
|
|
|
#include <algorithm> // std::sort
|
|
#include <string>
|
|
#include <utility>
|
|
|
|
#include "GeckoProfiler.h" // for profiler_current_thread_id()
|
|
#include "LulCommonExt.h"
|
|
#include "LulElfExt.h"
|
|
#include "LulMainInt.h"
|
|
#include "mozilla/ArrayUtils.h"
|
|
#include "mozilla/Assertions.h"
|
|
#include "mozilla/CheckedInt.h"
|
|
#include "mozilla/DebugOnly.h"
|
|
#include "mozilla/MemoryChecking.h"
|
|
#include "mozilla/Sprintf.h"
|
|
#include "mozilla/UniquePtr.h"
|
|
#include "mozilla/Unused.h"
|
|
|
|
// Set this to 1 for verbose logging
|
|
#define DEBUG_MAIN 0
|
|
|
|
namespace lul {
|
|
|
|
using mozilla::CheckedInt;
|
|
using mozilla::DebugOnly;
|
|
using mozilla::MallocSizeOf;
|
|
using mozilla::Unused;
|
|
using std::pair;
|
|
using std::string;
|
|
using std::vector;
|
|
|
|
// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
|
//
|
|
// Some functions in this file are marked RUNS IN NO-MALLOC CONTEXT.
|
|
// Any such function -- and, hence, the transitive closure of those
|
|
// reachable from it -- must not do any dynamic memory allocation.
|
|
// Doing so risks deadlock. There is exactly one root function for
|
|
// the transitive closure: Lul::Unwind.
|
|
//
|
|
// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// RuleSet //
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
static const char* NameOf_DW_REG(int16_t aReg) {
|
|
switch (aReg) {
|
|
case DW_REG_CFA:
|
|
return "cfa";
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
case DW_REG_INTEL_XBP:
|
|
return "xbp";
|
|
case DW_REG_INTEL_XSP:
|
|
return "xsp";
|
|
case DW_REG_INTEL_XIP:
|
|
return "xip";
|
|
#elif defined(GP_ARCH_arm)
|
|
case DW_REG_ARM_R7:
|
|
return "r7";
|
|
case DW_REG_ARM_R11:
|
|
return "r11";
|
|
case DW_REG_ARM_R12:
|
|
return "r12";
|
|
case DW_REG_ARM_R13:
|
|
return "r13";
|
|
case DW_REG_ARM_R14:
|
|
return "r14";
|
|
case DW_REG_ARM_R15:
|
|
return "r15";
|
|
#elif defined(GP_ARCH_arm64)
|
|
case DW_REG_AARCH64_X29:
|
|
return "x29";
|
|
case DW_REG_AARCH64_X30:
|
|
return "x30";
|
|
case DW_REG_AARCH64_SP:
|
|
return "sp";
|
|
#elif defined(GP_ARCH_mips64)
|
|
case DW_REG_MIPS_SP:
|
|
return "sp";
|
|
case DW_REG_MIPS_FP:
|
|
return "fp";
|
|
case DW_REG_MIPS_PC:
|
|
return "pc";
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
default:
|
|
return "???";
|
|
}
|
|
}
|
|
|
|
string LExpr::ShowRule(const char* aNewReg) const {
|
|
char buf[64];
|
|
string res = string(aNewReg) + "=";
|
|
switch (mHow) {
|
|
case UNKNOWN:
|
|
res += "Unknown";
|
|
break;
|
|
case NODEREF:
|
|
SprintfLiteral(buf, "%s+%d", NameOf_DW_REG(mReg), (int)mOffset);
|
|
res += buf;
|
|
break;
|
|
case DEREF:
|
|
SprintfLiteral(buf, "*(%s+%d)", NameOf_DW_REG(mReg), (int)mOffset);
|
|
res += buf;
|
|
break;
|
|
case PFXEXPR:
|
|
SprintfLiteral(buf, "PfxExpr-at-%d", (int)mOffset);
|
|
res += buf;
|
|
break;
|
|
default:
|
|
res += "???";
|
|
break;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
void RuleSet::Print(uintptr_t avma, uintptr_t len,
|
|
void (*aLog)(const char*)) const {
|
|
char buf[96];
|
|
SprintfLiteral(buf, "[%llx .. %llx]: let ", (unsigned long long int)avma,
|
|
(unsigned long long int)(avma + len - 1));
|
|
string res = string(buf);
|
|
res += mCfaExpr.ShowRule("cfa");
|
|
res += " in";
|
|
// For each reg we care about, print the recovery expression.
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
res += mXipExpr.ShowRule(" RA");
|
|
res += mXspExpr.ShowRule(" SP");
|
|
res += mXbpExpr.ShowRule(" BP");
|
|
#elif defined(GP_ARCH_arm)
|
|
res += mR15expr.ShowRule(" R15");
|
|
res += mR7expr.ShowRule(" R7");
|
|
res += mR11expr.ShowRule(" R11");
|
|
res += mR12expr.ShowRule(" R12");
|
|
res += mR13expr.ShowRule(" R13");
|
|
res += mR14expr.ShowRule(" R14");
|
|
#elif defined(GP_ARCH_arm64)
|
|
res += mX29expr.ShowRule(" X29");
|
|
res += mX30expr.ShowRule(" X30");
|
|
res += mSPexpr.ShowRule(" SP");
|
|
#elif defined(GP_ARCH_mips64)
|
|
res += mPCexpr.ShowRule(" PC");
|
|
res += mSPexpr.ShowRule(" SP");
|
|
res += mFPexpr.ShowRule(" FP");
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
aLog(res.c_str());
|
|
}
|
|
|
|
LExpr* RuleSet::ExprForRegno(DW_REG_NUMBER aRegno) {
|
|
switch (aRegno) {
|
|
case DW_REG_CFA:
|
|
return &mCfaExpr;
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
case DW_REG_INTEL_XIP:
|
|
return &mXipExpr;
|
|
case DW_REG_INTEL_XSP:
|
|
return &mXspExpr;
|
|
case DW_REG_INTEL_XBP:
|
|
return &mXbpExpr;
|
|
#elif defined(GP_ARCH_arm)
|
|
case DW_REG_ARM_R15:
|
|
return &mR15expr;
|
|
case DW_REG_ARM_R14:
|
|
return &mR14expr;
|
|
case DW_REG_ARM_R13:
|
|
return &mR13expr;
|
|
case DW_REG_ARM_R12:
|
|
return &mR12expr;
|
|
case DW_REG_ARM_R11:
|
|
return &mR11expr;
|
|
case DW_REG_ARM_R7:
|
|
return &mR7expr;
|
|
#elif defined(GP_ARCH_arm64)
|
|
case DW_REG_AARCH64_X29:
|
|
return &mX29expr;
|
|
case DW_REG_AARCH64_X30:
|
|
return &mX30expr;
|
|
case DW_REG_AARCH64_SP:
|
|
return &mSPexpr;
|
|
#elif defined(GP_ARCH_mips64)
|
|
case DW_REG_MIPS_SP:
|
|
return &mSPexpr;
|
|
case DW_REG_MIPS_FP:
|
|
return &mFPexpr;
|
|
case DW_REG_MIPS_PC:
|
|
return &mPCexpr;
|
|
#else
|
|
# error "Unknown arch"
|
|
#endif
|
|
default:
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
RuleSet::RuleSet() {
|
|
// All fields are of type LExpr and so are initialised by LExpr::LExpr().
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// SecMap //
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
// See header file LulMainInt.h for comments about invariants.
|
|
|
|
SecMap::SecMap(uintptr_t mapStartAVMA, uint32_t mapLen,
|
|
void (*aLog)(const char*))
|
|
: mUsable(false),
|
|
mUniqifier(new mozilla::HashMap<RuleSet, uint32_t, RuleSet,
|
|
InfallibleAllocPolicy>),
|
|
mLog(aLog) {
|
|
if (mapLen == 0) {
|
|
// Degenerate case.
|
|
mMapMinAVMA = 1;
|
|
mMapMaxAVMA = 0;
|
|
} else {
|
|
mMapMinAVMA = mapStartAVMA;
|
|
mMapMaxAVMA = mapStartAVMA + (uintptr_t)mapLen - 1;
|
|
}
|
|
}
|
|
|
|
SecMap::~SecMap() {
|
|
mExtents.clear();
|
|
mDictionary.clear();
|
|
if (mUniqifier) {
|
|
mUniqifier->clear();
|
|
mUniqifier = nullptr;
|
|
}
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
RuleSet* SecMap::FindRuleSet(uintptr_t ia) {
|
|
// Binary search mExtents to find one that brackets |ia|.
|
|
// lo and hi need to be signed, else the loop termination tests
|
|
// don't work properly. Note that this works correctly even when
|
|
// mExtents.size() == 0.
|
|
|
|
// Can't do this until the array has been sorted and preened.
|
|
MOZ_ASSERT(mUsable);
|
|
|
|
long int lo = 0;
|
|
long int hi = (long int)mExtents.size() - 1;
|
|
while (true) {
|
|
// current unsearched space is from lo to hi, inclusive.
|
|
if (lo > hi) {
|
|
// not found
|
|
return nullptr;
|
|
}
|
|
long int mid = lo + ((hi - lo) / 2);
|
|
Extent* mid_extent = &mExtents[mid];
|
|
uintptr_t mid_offset = mid_extent->offset();
|
|
uintptr_t mid_len = mid_extent->len();
|
|
uintptr_t mid_minAddr = mMapMinAVMA + mid_offset;
|
|
uintptr_t mid_maxAddr = mid_minAddr + mid_len - 1;
|
|
if (ia < mid_minAddr) {
|
|
hi = mid - 1;
|
|
continue;
|
|
}
|
|
if (ia > mid_maxAddr) {
|
|
lo = mid + 1;
|
|
continue;
|
|
}
|
|
MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
|
|
uint32_t mid_extent_dictIx = mid_extent->dictIx();
|
|
MOZ_RELEASE_ASSERT(mid_extent_dictIx < mExtents.size());
|
|
return &mDictionary[mid_extent_dictIx];
|
|
}
|
|
// NOTREACHED
|
|
}
|
|
|
|
// Add a RuleSet to the collection. The rule is copied in. Calling
|
|
// this makes the map non-searchable.
|
|
void SecMap::AddRuleSet(const RuleSet* rs, uintptr_t avma, uintptr_t len) {
|
|
mUsable = false;
|
|
|
|
// Zero length RuleSet? Meaningless, but ignore it anyway.
|
|
if (len == 0) {
|
|
return;
|
|
}
|
|
|
|
// Ignore attempts to add RuleSets whose address range doesn't fall within
|
|
// the declared address range for the SecMap. Maybe we should print some
|
|
// kind of error message rather than silently ignoring them.
|
|
if (!(avma >= mMapMinAVMA && avma + len - 1 <= mMapMaxAVMA)) {
|
|
return;
|
|
}
|
|
|
|
// Because `mMapStartAVMA` .. `mMapEndAVMA` can specify at most a 2^32-1 byte
|
|
// chunk of address space, the following must now hold.
|
|
MOZ_RELEASE_ASSERT(len <= (uintptr_t)0xFFFFFFFF);
|
|
|
|
// See if `mUniqifier` already has `rs`. If so set `dictIx` to the assigned
|
|
// dictionary index; if not, add `rs` to `mUniqifier` and assign a new
|
|
// dictionary index. This is the core of the RuleSet-de-duplication process.
|
|
uint32_t dictIx = 0;
|
|
mozilla::HashMap<RuleSet, uint32_t, RuleSet, InfallibleAllocPolicy>::AddPtr
|
|
p = mUniqifier->lookupForAdd(*rs);
|
|
if (!p) {
|
|
dictIx = mUniqifier->count();
|
|
// If this ever fails, Extents::dictIx will need to be changed to be a
|
|
// type wider than the current uint16_t.
|
|
MOZ_RELEASE_ASSERT(dictIx < (1 << 16));
|
|
// This returns `false` on OOM. We ignore the return value since we asked
|
|
// for it to use the InfallibleAllocPolicy.
|
|
DebugOnly<bool> addedOK = mUniqifier->add(p, *rs, dictIx);
|
|
MOZ_ASSERT(addedOK);
|
|
} else {
|
|
dictIx = p->value();
|
|
}
|
|
|
|
uint32_t offset = (uint32_t)(avma - mMapMinAVMA);
|
|
while (len > 0) {
|
|
// Because Extents::len is a uint16_t, we have to add multiple `mExtents`
|
|
// entries to cover the case where `len` is equal to or greater than 2^16.
|
|
// This happens only exceedingly rarely. In order to get more test
|
|
// coverage on what would otherwise be a very low probability (less than
|
|
// 0.0002%) corner case, we do this in steps of 4095. On libxul.so as of
|
|
// Sept 2020, this increases the number of `mExtents` entries by about
|
|
// 0.05%, hence has no meaningful effect on space use, but increases the
|
|
// use of this corner case, and hence its test coverage, by a factor of 250.
|
|
uint32_t this_step_len = (len > 4095) ? 4095 : len;
|
|
mExtents.emplace_back(offset, this_step_len, dictIx);
|
|
offset += this_step_len;
|
|
len -= this_step_len;
|
|
}
|
|
}
|
|
|
|
// Add a PfxInstr to the vector of such instrs, and return the index
|
|
// in the vector. Calling this makes the map non-searchable.
|
|
uint32_t SecMap::AddPfxInstr(PfxInstr pfxi) {
|
|
mUsable = false;
|
|
mPfxInstrs.push_back(pfxi);
|
|
return mPfxInstrs.size() - 1;
|
|
}
|
|
|
|
static bool CmpExtentsByOffsetLE(const Extent& ext1, const Extent& ext2) {
|
|
return ext1.offset() < ext2.offset();
|
|
}
|
|
|
|
// Prepare the map for searching, by sorting it, de-overlapping entries and
|
|
// removing any resulting zero-length entries. At the start of this routine,
|
|
// all Extents should fall within [mMapMinAVMA, mMapMaxAVMA] and not have zero
|
|
// length, as a result of the checks in AddRuleSet().
|
|
void SecMap::PrepareRuleSets() {
|
|
// At this point, the de-duped RuleSets are in `mUniqifier`, and
|
|
// `mDictionary` is empty. This method will, amongst other things, copy
|
|
// them into `mDictionary` in order of their assigned dictionary-index
|
|
// values, as established by `SecMap::AddRuleSet`, and free `mUniqifier`;
|
|
// after this method, it has no further use.
|
|
MOZ_RELEASE_ASSERT(mUniqifier);
|
|
MOZ_RELEASE_ASSERT(mDictionary.empty());
|
|
|
|
if (mExtents.empty()) {
|
|
mUniqifier->clear();
|
|
mUniqifier = nullptr;
|
|
return;
|
|
}
|
|
|
|
if (mMapMinAVMA == 1 && mMapMaxAVMA == 0) {
|
|
// The map is empty. This should never happen.
|
|
mExtents.clear();
|
|
mUniqifier->clear();
|
|
mUniqifier = nullptr;
|
|
return;
|
|
}
|
|
MOZ_RELEASE_ASSERT(mMapMinAVMA <= mMapMaxAVMA);
|
|
|
|
// We must have at least one Extent, and as a consequence there must be at
|
|
// least one entry in the uniqifier.
|
|
MOZ_RELEASE_ASSERT(!mExtents.empty() && !mUniqifier->empty());
|
|
|
|
#ifdef DEBUG
|
|
// Check invariants on incoming Extents.
|
|
for (size_t i = 0; i < mExtents.size(); ++i) {
|
|
Extent* ext = &mExtents[i];
|
|
uint32_t len = ext->len();
|
|
MOZ_ASSERT(len > 0);
|
|
MOZ_ASSERT(len <= 4095 /* per '4095' in AddRuleSet() */);
|
|
uint32_t offset = ext->offset();
|
|
uintptr_t avma = mMapMinAVMA + (uintptr_t)offset;
|
|
// Upper bounds test. There's no lower bounds test because `offset` is a
|
|
// positive displacement from `mMapMinAVMA`, so a small underrun will
|
|
// manifest as `len` being close to 2^32.
|
|
MOZ_ASSERT(avma + (uintptr_t)len - 1 <= mMapMaxAVMA);
|
|
}
|
|
#endif
|
|
|
|
// Sort by start addresses.
|
|
std::sort(mExtents.begin(), mExtents.end(), CmpExtentsByOffsetLE);
|
|
|
|
// Iteratively truncate any overlaps and remove any zero length
|
|
// entries that might result, or that may have been present
|
|
// initially. Unless the input is seriously screwy, this is
|
|
// expected to iterate only once.
|
|
while (true) {
|
|
size_t i;
|
|
size_t n = mExtents.size();
|
|
size_t nZeroLen = 0;
|
|
|
|
if (n == 0) {
|
|
break;
|
|
}
|
|
|
|
for (i = 1; i < n; ++i) {
|
|
Extent* prev = &mExtents[i - 1];
|
|
Extent* here = &mExtents[i];
|
|
MOZ_ASSERT(prev->offset() <= here->offset());
|
|
if (prev->offset() + prev->len() > here->offset()) {
|
|
prev->setLen(here->offset() - prev->offset());
|
|
}
|
|
if (prev->len() == 0) {
|
|
nZeroLen++;
|
|
}
|
|
}
|
|
|
|
if (mExtents[n - 1].len() == 0) {
|
|
nZeroLen++;
|
|
}
|
|
|
|
// At this point, the entries are in-order and non-overlapping.
|
|
// If none of them are zero-length, we are done.
|
|
if (nZeroLen == 0) {
|
|
break;
|
|
}
|
|
|
|
// Slide back the entries to remove the zero length ones.
|
|
size_t j = 0; // The write-point.
|
|
for (i = 0; i < n; ++i) {
|
|
if (mExtents[i].len() == 0) {
|
|
continue;
|
|
}
|
|
if (j != i) {
|
|
mExtents[j] = mExtents[i];
|
|
}
|
|
++j;
|
|
}
|
|
MOZ_ASSERT(i == n);
|
|
MOZ_ASSERT(nZeroLen <= n);
|
|
MOZ_ASSERT(j == n - nZeroLen);
|
|
while (nZeroLen > 0) {
|
|
mExtents.pop_back();
|
|
nZeroLen--;
|
|
}
|
|
|
|
MOZ_ASSERT(mExtents.size() == j);
|
|
}
|
|
|
|
size_t nExtents = mExtents.size();
|
|
|
|
#ifdef DEBUG
|
|
// Do a final check on the extents: their address ranges must be
|
|
// ascending, non overlapping, non zero sized.
|
|
if (nExtents > 0) {
|
|
MOZ_ASSERT(mExtents[0].len() > 0);
|
|
for (size_t i = 1; i < nExtents; ++i) {
|
|
const Extent* prev = &mExtents[i - 1];
|
|
const Extent* here = &mExtents[i];
|
|
MOZ_ASSERT(prev->offset() < here->offset());
|
|
MOZ_ASSERT(here->len() > 0);
|
|
MOZ_ASSERT(prev->offset() + prev->len() <= here->offset());
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// Create the final dictionary by enumerating the uniqifier.
|
|
size_t nUniques = mUniqifier->count();
|
|
|
|
RuleSet dummy;
|
|
mozilla::PodZero(&dummy);
|
|
|
|
mDictionary.reserve(nUniques);
|
|
for (size_t i = 0; i < nUniques; i++) {
|
|
mDictionary.push_back(dummy);
|
|
}
|
|
|
|
for (auto iter = mUniqifier->iter(); !iter.done(); iter.next()) {
|
|
MOZ_RELEASE_ASSERT(iter.get().value() < nUniques);
|
|
mDictionary[iter.get().value()] = iter.get().key();
|
|
}
|
|
|
|
mUniqifier = nullptr;
|
|
|
|
char buf[150];
|
|
SprintfLiteral(
|
|
buf,
|
|
"PrepareRuleSets: %lu extents, %lu rulesets, "
|
|
"avma min/max 0x%llx, 0x%llx\n",
|
|
(unsigned long int)nExtents, (unsigned long int)mDictionary.size(),
|
|
(unsigned long long int)mMapMinAVMA, (unsigned long long int)mMapMaxAVMA);
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
|
|
// Is now usable for binary search.
|
|
mUsable = true;
|
|
|
|
#if 0
|
|
mLog("\nRulesets after preening\n");
|
|
for (size_t i = 0; i < nExtents; ++i) {
|
|
const Extent* extent = &mExtents[i];
|
|
uintptr_t avma = mMapMinAVMA + (uintptr_t)extent->offset();
|
|
mDictionary[extent->dictIx()].Print(avma, extent->len(), mLog);
|
|
mLog("\n");
|
|
}
|
|
mLog("\n");
|
|
#endif
|
|
}
|
|
|
|
bool SecMap::IsEmpty() { return mExtents.empty(); }
|
|
|
|
size_t SecMap::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
|
|
size_t n = aMallocSizeOf(this);
|
|
|
|
// It's conceivable that these calls would be unsafe with some
|
|
// implementations of std::vector, but it seems to be working for now...
|
|
n += aMallocSizeOf(mPfxInstrs.data());
|
|
|
|
if (mUniqifier) {
|
|
n += mUniqifier->shallowSizeOfIncludingThis(aMallocSizeOf);
|
|
}
|
|
n += aMallocSizeOf(mDictionary.data());
|
|
n += aMallocSizeOf(mExtents.data());
|
|
|
|
return n;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// SegArray //
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
// A SegArray holds a set of address ranges that together exactly
|
|
// cover an address range, with no overlaps or holes. Each range has
|
|
// an associated value, which in this case has been specialised to be
|
|
// a simple boolean. The representation is kept to minimal canonical
|
|
// form in which adjacent ranges with the same associated value are
|
|
// merged together. Each range is represented by a |struct Seg|.
|
|
//
|
|
// SegArrays are used to keep track of which parts of the address
|
|
// space are known to contain instructions.
|
|
class SegArray {
|
|
public:
|
|
void add(uintptr_t lo, uintptr_t hi, bool val) {
|
|
if (lo > hi) {
|
|
return;
|
|
}
|
|
split_at(lo);
|
|
if (hi < UINTPTR_MAX) {
|
|
split_at(hi + 1);
|
|
}
|
|
std::vector<Seg>::size_type iLo, iHi, i;
|
|
iLo = find(lo);
|
|
iHi = find(hi);
|
|
for (i = iLo; i <= iHi; ++i) {
|
|
mSegs[i].val = val;
|
|
}
|
|
preen();
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
bool getBoundingCodeSegment(/*OUT*/ uintptr_t* rx_min,
|
|
/*OUT*/ uintptr_t* rx_max, uintptr_t addr) {
|
|
std::vector<Seg>::size_type i = find(addr);
|
|
if (!mSegs[i].val) {
|
|
return false;
|
|
}
|
|
*rx_min = mSegs[i].lo;
|
|
*rx_max = mSegs[i].hi;
|
|
return true;
|
|
}
|
|
|
|
SegArray() {
|
|
Seg s(0, UINTPTR_MAX, false);
|
|
mSegs.push_back(s);
|
|
}
|
|
|
|
private:
|
|
struct Seg {
|
|
Seg(uintptr_t lo, uintptr_t hi, bool val) : lo(lo), hi(hi), val(val) {}
|
|
uintptr_t lo;
|
|
uintptr_t hi;
|
|
bool val;
|
|
};
|
|
|
|
void preen() {
|
|
for (std::vector<Seg>::iterator iter = mSegs.begin();
|
|
iter < mSegs.end() - 1; ++iter) {
|
|
if (iter[0].val != iter[1].val) {
|
|
continue;
|
|
}
|
|
iter[0].hi = iter[1].hi;
|
|
mSegs.erase(iter + 1);
|
|
// Back up one, so as not to miss an opportunity to merge
|
|
// with the entry after this one.
|
|
--iter;
|
|
}
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
std::vector<Seg>::size_type find(uintptr_t a) {
|
|
long int lo = 0;
|
|
long int hi = (long int)mSegs.size();
|
|
while (true) {
|
|
// The unsearched space is lo .. hi inclusive.
|
|
if (lo > hi) {
|
|
// Not found. This can't happen.
|
|
return (std::vector<Seg>::size_type)(-1);
|
|
}
|
|
long int mid = lo + ((hi - lo) / 2);
|
|
uintptr_t mid_lo = mSegs[mid].lo;
|
|
uintptr_t mid_hi = mSegs[mid].hi;
|
|
if (a < mid_lo) {
|
|
hi = mid - 1;
|
|
continue;
|
|
}
|
|
if (a > mid_hi) {
|
|
lo = mid + 1;
|
|
continue;
|
|
}
|
|
return (std::vector<Seg>::size_type)mid;
|
|
}
|
|
}
|
|
|
|
void split_at(uintptr_t a) {
|
|
std::vector<Seg>::size_type i = find(a);
|
|
if (mSegs[i].lo == a) {
|
|
return;
|
|
}
|
|
mSegs.insert(mSegs.begin() + i + 1, mSegs[i]);
|
|
mSegs[i].hi = a - 1;
|
|
mSegs[i + 1].lo = a;
|
|
}
|
|
|
|
void show() {
|
|
printf("<< %d entries:\n", (int)mSegs.size());
|
|
for (std::vector<Seg>::iterator iter = mSegs.begin(); iter < mSegs.end();
|
|
++iter) {
|
|
printf(" %016llx %016llx %s\n", (unsigned long long int)(*iter).lo,
|
|
(unsigned long long int)(*iter).hi,
|
|
(*iter).val ? "true" : "false");
|
|
}
|
|
printf(">>\n");
|
|
}
|
|
|
|
std::vector<Seg> mSegs;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// PriMap //
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
class PriMap {
|
|
public:
|
|
explicit PriMap(void (*aLog)(const char*)) : mLog(aLog) {}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
pair<const RuleSet*, const vector<PfxInstr>*> Lookup(uintptr_t ia) {
|
|
SecMap* sm = FindSecMap(ia);
|
|
return pair<const RuleSet*, const vector<PfxInstr>*>(
|
|
sm ? sm->FindRuleSet(ia) : nullptr, sm ? sm->GetPfxInstrs() : nullptr);
|
|
}
|
|
|
|
// Add a secondary map. No overlaps allowed w.r.t. existing
|
|
// secondary maps.
|
|
void AddSecMap(mozilla::UniquePtr<SecMap>&& aSecMap) {
|
|
// We can't add an empty SecMap to the PriMap. But that's OK
|
|
// since we'd never be able to find anything in it anyway.
|
|
if (aSecMap->IsEmpty()) {
|
|
return;
|
|
}
|
|
|
|
// Iterate through the SecMaps and find the right place for this
|
|
// one. At the same time, ensure that the in-order
|
|
// non-overlapping invariant is preserved (and, generally, holds).
|
|
// FIXME: this gives a cost that is O(N^2) in the total number of
|
|
// shared objects in the system. ToDo: better.
|
|
MOZ_ASSERT(aSecMap->mMapMinAVMA <= aSecMap->mMapMaxAVMA);
|
|
|
|
size_t num_secMaps = mSecMaps.size();
|
|
uintptr_t i;
|
|
for (i = 0; i < num_secMaps; ++i) {
|
|
mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
|
|
MOZ_ASSERT(sm_i->mMapMinAVMA <= sm_i->mMapMaxAVMA);
|
|
if (aSecMap->mMapMinAVMA < sm_i->mMapMaxAVMA) {
|
|
// |aSecMap| needs to be inserted immediately before mSecMaps[i].
|
|
break;
|
|
}
|
|
}
|
|
MOZ_ASSERT(i <= num_secMaps);
|
|
if (i == num_secMaps) {
|
|
// It goes at the end.
|
|
mSecMaps.push_back(std::move(aSecMap));
|
|
} else {
|
|
std::vector<mozilla::UniquePtr<SecMap>>::iterator iter =
|
|
mSecMaps.begin() + i;
|
|
mSecMaps.insert(iter, std::move(aSecMap));
|
|
}
|
|
char buf[100];
|
|
SprintfLiteral(buf, "AddSecMap: now have %d SecMaps\n",
|
|
(int)mSecMaps.size());
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
}
|
|
|
|
// Remove and delete any SecMaps in the mapping, that intersect
|
|
// with the specified address range.
|
|
void RemoveSecMapsInRange(uintptr_t avma_min, uintptr_t avma_max) {
|
|
MOZ_ASSERT(avma_min <= avma_max);
|
|
size_t num_secMaps = mSecMaps.size();
|
|
if (num_secMaps > 0) {
|
|
intptr_t i;
|
|
// Iterate from end to start over the vector, so as to ensure
|
|
// that the special case where |avma_min| and |avma_max| denote
|
|
// the entire address space, can be completed in time proportional
|
|
// to the number of elements in the map.
|
|
for (i = (intptr_t)num_secMaps - 1; i >= 0; i--) {
|
|
mozilla::UniquePtr<SecMap>& sm_i = mSecMaps[i];
|
|
if (sm_i->mMapMaxAVMA < avma_min || avma_max < sm_i->mMapMinAVMA) {
|
|
// There's no overlap. Move on.
|
|
continue;
|
|
}
|
|
// We need to remove mSecMaps[i] and slide all those above it
|
|
// downwards to cover the hole.
|
|
mSecMaps.erase(mSecMaps.begin() + i);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return the number of currently contained SecMaps.
|
|
size_t CountSecMaps() { return mSecMaps.size(); }
|
|
|
|
size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
|
|
size_t n = aMallocSizeOf(this);
|
|
|
|
// It's conceivable that this call would be unsafe with some
|
|
// implementations of std::vector, but it seems to be working for now...
|
|
n += aMallocSizeOf(mSecMaps.data());
|
|
|
|
for (size_t i = 0; i < mSecMaps.size(); i++) {
|
|
n += mSecMaps[i]->SizeOfIncludingThis(aMallocSizeOf);
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
private:
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
SecMap* FindSecMap(uintptr_t ia) {
|
|
// Binary search mSecMaps to find one that brackets |ia|.
|
|
// lo and hi need to be signed, else the loop termination tests
|
|
// don't work properly.
|
|
long int lo = 0;
|
|
long int hi = (long int)mSecMaps.size() - 1;
|
|
while (true) {
|
|
// current unsearched space is from lo to hi, inclusive.
|
|
if (lo > hi) {
|
|
// not found
|
|
return nullptr;
|
|
}
|
|
long int mid = lo + ((hi - lo) / 2);
|
|
mozilla::UniquePtr<SecMap>& mid_secMap = mSecMaps[mid];
|
|
uintptr_t mid_minAddr = mid_secMap->mMapMinAVMA;
|
|
uintptr_t mid_maxAddr = mid_secMap->mMapMaxAVMA;
|
|
if (ia < mid_minAddr) {
|
|
hi = mid - 1;
|
|
continue;
|
|
}
|
|
if (ia > mid_maxAddr) {
|
|
lo = mid + 1;
|
|
continue;
|
|
}
|
|
MOZ_ASSERT(mid_minAddr <= ia && ia <= mid_maxAddr);
|
|
return mid_secMap.get();
|
|
}
|
|
// NOTREACHED
|
|
}
|
|
|
|
private:
|
|
// sorted array of per-object ranges, non overlapping, non empty
|
|
std::vector<mozilla::UniquePtr<SecMap>> mSecMaps;
|
|
|
|
// a logging sink, for debugging.
|
|
void (*mLog)(const char*);
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// LUL //
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
#define LUL_LOG(_str) \
|
|
do { \
|
|
char buf[200]; \
|
|
SprintfLiteral(buf, "LUL: pid %d tid %d lul-obj %p: %s", \
|
|
profiler_current_process_id(), \
|
|
profiler_current_thread_id(), this, (_str)); \
|
|
buf[sizeof(buf) - 1] = 0; \
|
|
mLog(buf); \
|
|
} while (0)
|
|
|
|
LUL::LUL(void (*aLog)(const char*))
|
|
: mLog(aLog),
|
|
mAdminMode(true),
|
|
mAdminThreadId(profiler_current_thread_id()),
|
|
mPriMap(new PriMap(aLog)),
|
|
mSegArray(new SegArray()),
|
|
mUSU(new UniqueStringUniverse()) {
|
|
LUL_LOG("LUL::LUL: Created object");
|
|
}
|
|
|
|
LUL::~LUL() {
|
|
LUL_LOG("LUL::~LUL: Destroyed object");
|
|
delete mPriMap;
|
|
delete mSegArray;
|
|
mLog = nullptr;
|
|
delete mUSU;
|
|
}
|
|
|
|
void LUL::MaybeShowStats() {
|
|
// This is racey in the sense that it can't guarantee that
|
|
// n_new == n_new_Context + n_new_CFI + n_new_Scanned
|
|
// if it should happen that mStats is updated by some other thread
|
|
// in between computation of n_new and n_new_{Context,CFI,FP}.
|
|
// But it's just stats printing, so we don't really care.
|
|
uint32_t n_new = mStats - mStatsPrevious;
|
|
if (n_new >= 5000) {
|
|
uint32_t n_new_Context = mStats.mContext - mStatsPrevious.mContext;
|
|
uint32_t n_new_CFI = mStats.mCFI - mStatsPrevious.mCFI;
|
|
uint32_t n_new_FP = mStats.mFP - mStatsPrevious.mFP;
|
|
mStatsPrevious = mStats;
|
|
char buf[200];
|
|
SprintfLiteral(buf,
|
|
"LUL frame stats: TOTAL %5u"
|
|
" CTX %4u CFI %4u FP %4u",
|
|
n_new, n_new_Context, n_new_CFI, n_new_FP);
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
}
|
|
}
|
|
|
|
size_t LUL::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
|
|
size_t n = aMallocSizeOf(this);
|
|
n += mPriMap->SizeOfIncludingThis(aMallocSizeOf);
|
|
|
|
// Measurement of the following members may be added later if DMD finds it
|
|
// is worthwhile:
|
|
// - mSegArray
|
|
// - mUSU
|
|
|
|
return n;
|
|
}
|
|
|
|
void LUL::EnableUnwinding() {
|
|
LUL_LOG("LUL::EnableUnwinding");
|
|
// Don't assert for Admin mode here. That is, tolerate a call here
|
|
// if we are already in Unwinding mode.
|
|
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
|
|
|
|
mAdminMode = false;
|
|
}
|
|
|
|
void LUL::NotifyAfterMap(uintptr_t aRXavma, size_t aSize, const char* aFileName,
|
|
const void* aMappedImage) {
|
|
MOZ_RELEASE_ASSERT(mAdminMode);
|
|
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
|
|
|
|
mLog(":\n");
|
|
char buf[200];
|
|
SprintfLiteral(buf, "NotifyMap %llx %llu %s\n",
|
|
(unsigned long long int)aRXavma, (unsigned long long int)aSize,
|
|
aFileName);
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
|
|
// We can't have a SecMap covering more than 2^32-1 bytes of address space.
|
|
// See the definition of SecMap for why. Rather than crash the system, just
|
|
// limit the SecMap's size accordingly. This case is never actually
|
|
// expected to happen.
|
|
if (((unsigned long long int)aSize) > 0xFFFFFFFFULL) {
|
|
aSize = (uintptr_t)0xFFFFFFFF;
|
|
}
|
|
MOZ_RELEASE_ASSERT(aSize <= 0xFFFFFFFF);
|
|
|
|
// Ignore obviously-stupid notifications.
|
|
if (aSize > 0) {
|
|
// Here's a new mapping, for this object.
|
|
mozilla::UniquePtr<SecMap> smap =
|
|
mozilla::MakeUnique<SecMap>(aRXavma, (uint32_t)aSize, mLog);
|
|
|
|
// Read CFI or EXIDX unwind data into |smap|.
|
|
if (!aMappedImage) {
|
|
(void)lul::ReadSymbolData(string(aFileName), std::vector<string>(),
|
|
smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
|
|
} else {
|
|
(void)lul::ReadSymbolDataInternal(
|
|
(const uint8_t*)aMappedImage, string(aFileName),
|
|
std::vector<string>(), smap.get(), (void*)aRXavma, aSize, mUSU, mLog);
|
|
}
|
|
|
|
mLog("NotifyMap .. preparing entries\n");
|
|
|
|
smap->PrepareRuleSets();
|
|
|
|
SprintfLiteral(buf, "NotifyMap got %lld entries\n",
|
|
(long long int)smap->Size());
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
|
|
// Add it to the primary map (the top level set of mapped objects).
|
|
mPriMap->AddSecMap(std::move(smap));
|
|
|
|
// Tell the segment array about the mapping, so that the stack
|
|
// scan and __kernel_syscall mechanisms know where valid code is.
|
|
mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
|
|
}
|
|
}
|
|
|
|
void LUL::NotifyExecutableArea(uintptr_t aRXavma, size_t aSize) {
|
|
MOZ_RELEASE_ASSERT(mAdminMode);
|
|
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
|
|
|
|
mLog(":\n");
|
|
char buf[200];
|
|
SprintfLiteral(buf, "NotifyExecutableArea %llx %llu\n",
|
|
(unsigned long long int)aRXavma,
|
|
(unsigned long long int)aSize);
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
|
|
// Ignore obviously-stupid notifications.
|
|
if (aSize > 0) {
|
|
// Tell the segment array about the mapping, so that the stack
|
|
// scan and __kernel_syscall mechanisms know where valid code is.
|
|
mSegArray->add(aRXavma, aRXavma + aSize - 1, true);
|
|
}
|
|
}
|
|
|
|
void LUL::NotifyBeforeUnmap(uintptr_t aRXavmaMin, uintptr_t aRXavmaMax) {
|
|
MOZ_RELEASE_ASSERT(mAdminMode);
|
|
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
|
|
|
|
mLog(":\n");
|
|
char buf[100];
|
|
SprintfLiteral(buf, "NotifyUnmap %016llx-%016llx\n",
|
|
(unsigned long long int)aRXavmaMin,
|
|
(unsigned long long int)aRXavmaMax);
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
|
|
MOZ_ASSERT(aRXavmaMin <= aRXavmaMax);
|
|
|
|
// Remove from the primary map, any secondary maps that intersect
|
|
// with the address range. Also delete the secondary maps.
|
|
mPriMap->RemoveSecMapsInRange(aRXavmaMin, aRXavmaMax);
|
|
|
|
// Tell the segment array that the address range no longer
|
|
// contains valid code.
|
|
mSegArray->add(aRXavmaMin, aRXavmaMax, false);
|
|
|
|
SprintfLiteral(buf, "NotifyUnmap: now have %d SecMaps\n",
|
|
(int)mPriMap->CountSecMaps());
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
}
|
|
|
|
size_t LUL::CountMappings() {
|
|
MOZ_RELEASE_ASSERT(mAdminMode);
|
|
MOZ_RELEASE_ASSERT(profiler_current_thread_id() == mAdminThreadId);
|
|
|
|
return mPriMap->CountSecMaps();
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
static TaggedUWord DerefTUW(TaggedUWord aAddr, const StackImage* aStackImg) {
|
|
if (!aAddr.Valid()) {
|
|
return TaggedUWord();
|
|
}
|
|
|
|
// Lower limit check. |aAddr.Value()| is the lowest requested address
|
|
// and |aStackImg->mStartAvma| is the lowest address we actually have,
|
|
// so the comparison is straightforward.
|
|
if (aAddr.Value() < aStackImg->mStartAvma) {
|
|
return TaggedUWord();
|
|
}
|
|
|
|
// Upper limit check. We must compute the highest requested address
|
|
// and the highest address we actually have, but being careful to
|
|
// avoid overflow. In particular if |aAddr| is 0xFFF...FFF or the
|
|
// 3/7 values below that, then we will get overflow. See bug #1245477.
|
|
typedef CheckedInt<uintptr_t> CheckedUWord;
|
|
CheckedUWord highest_requested_plus_one =
|
|
CheckedUWord(aAddr.Value()) + CheckedUWord(sizeof(uintptr_t));
|
|
CheckedUWord highest_available_plus_one =
|
|
CheckedUWord(aStackImg->mStartAvma) + CheckedUWord(aStackImg->mLen);
|
|
if (!highest_requested_plus_one.isValid() // overflow?
|
|
|| !highest_available_plus_one.isValid() // overflow?
|
|
|| (highest_requested_plus_one.value() >
|
|
highest_available_plus_one.value())) { // in range?
|
|
return TaggedUWord();
|
|
}
|
|
|
|
return TaggedUWord(
|
|
*(uintptr_t*)(&aStackImg
|
|
->mContents[aAddr.Value() - aStackImg->mStartAvma]));
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
static TaggedUWord EvaluateReg(int16_t aReg, const UnwindRegs* aOldRegs,
|
|
TaggedUWord aCFA) {
|
|
switch (aReg) {
|
|
case DW_REG_CFA:
|
|
return aCFA;
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
case DW_REG_INTEL_XBP:
|
|
return aOldRegs->xbp;
|
|
case DW_REG_INTEL_XSP:
|
|
return aOldRegs->xsp;
|
|
case DW_REG_INTEL_XIP:
|
|
return aOldRegs->xip;
|
|
#elif defined(GP_ARCH_arm)
|
|
case DW_REG_ARM_R7:
|
|
return aOldRegs->r7;
|
|
case DW_REG_ARM_R11:
|
|
return aOldRegs->r11;
|
|
case DW_REG_ARM_R12:
|
|
return aOldRegs->r12;
|
|
case DW_REG_ARM_R13:
|
|
return aOldRegs->r13;
|
|
case DW_REG_ARM_R14:
|
|
return aOldRegs->r14;
|
|
case DW_REG_ARM_R15:
|
|
return aOldRegs->r15;
|
|
#elif defined(GP_ARCH_arm64)
|
|
case DW_REG_AARCH64_X29:
|
|
return aOldRegs->x29;
|
|
case DW_REG_AARCH64_X30:
|
|
return aOldRegs->x30;
|
|
case DW_REG_AARCH64_SP:
|
|
return aOldRegs->sp;
|
|
#elif defined(GP_ARCH_mips64)
|
|
case DW_REG_MIPS_SP:
|
|
return aOldRegs->sp;
|
|
case DW_REG_MIPS_FP:
|
|
return aOldRegs->fp;
|
|
case DW_REG_MIPS_PC:
|
|
return aOldRegs->pc;
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
default:
|
|
MOZ_ASSERT(0);
|
|
return TaggedUWord();
|
|
}
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
// See prototype for comment.
|
|
TaggedUWord EvaluatePfxExpr(int32_t start, const UnwindRegs* aOldRegs,
|
|
TaggedUWord aCFA, const StackImage* aStackImg,
|
|
const vector<PfxInstr>& aPfxInstrs) {
|
|
// A small evaluation stack, and a stack pointer, which points to
|
|
// the highest numbered in-use element.
|
|
const int N_STACK = 10;
|
|
TaggedUWord stack[N_STACK];
|
|
int stackPointer = -1;
|
|
for (int i = 0; i < N_STACK; i++) stack[i] = TaggedUWord();
|
|
|
|
#define PUSH(_tuw) \
|
|
do { \
|
|
if (stackPointer >= N_STACK - 1) goto fail; /* overflow */ \
|
|
stack[++stackPointer] = (_tuw); \
|
|
} while (0)
|
|
|
|
#define POP(_lval) \
|
|
do { \
|
|
if (stackPointer < 0) goto fail; /* underflow */ \
|
|
_lval = stack[stackPointer--]; \
|
|
} while (0)
|
|
|
|
// Cursor in the instruction sequence.
|
|
size_t curr = start + 1;
|
|
|
|
// Check the start point is sane.
|
|
size_t nInstrs = aPfxInstrs.size();
|
|
if (start < 0 || (size_t)start >= nInstrs) goto fail;
|
|
|
|
{
|
|
// The instruction sequence must start with PX_Start. If not,
|
|
// something is seriously wrong.
|
|
PfxInstr first = aPfxInstrs[start];
|
|
if (first.mOpcode != PX_Start) goto fail;
|
|
|
|
// Push the CFA on the stack to start with (or not), as required by
|
|
// the original DW_OP_*expression* CFI.
|
|
if (first.mOperand != 0) PUSH(aCFA);
|
|
}
|
|
|
|
while (true) {
|
|
if (curr >= nInstrs) goto fail; // ran off the end of the sequence
|
|
|
|
PfxInstr pfxi = aPfxInstrs[curr++];
|
|
if (pfxi.mOpcode == PX_End) break; // we're done
|
|
|
|
switch (pfxi.mOpcode) {
|
|
case PX_Start:
|
|
// This should appear only at the start of the sequence.
|
|
goto fail;
|
|
case PX_End:
|
|
// We just took care of that, so we shouldn't see it again.
|
|
MOZ_ASSERT(0);
|
|
goto fail;
|
|
case PX_SImm32:
|
|
PUSH(TaggedUWord((intptr_t)pfxi.mOperand));
|
|
break;
|
|
case PX_DwReg: {
|
|
DW_REG_NUMBER reg = (DW_REG_NUMBER)pfxi.mOperand;
|
|
MOZ_ASSERT(reg != DW_REG_CFA);
|
|
PUSH(EvaluateReg(reg, aOldRegs, aCFA));
|
|
break;
|
|
}
|
|
case PX_Deref: {
|
|
TaggedUWord addr;
|
|
POP(addr);
|
|
PUSH(DerefTUW(addr, aStackImg));
|
|
break;
|
|
}
|
|
case PX_Add: {
|
|
TaggedUWord x, y;
|
|
POP(x);
|
|
POP(y);
|
|
PUSH(y + x);
|
|
break;
|
|
}
|
|
case PX_Sub: {
|
|
TaggedUWord x, y;
|
|
POP(x);
|
|
POP(y);
|
|
PUSH(y - x);
|
|
break;
|
|
}
|
|
case PX_And: {
|
|
TaggedUWord x, y;
|
|
POP(x);
|
|
POP(y);
|
|
PUSH(y & x);
|
|
break;
|
|
}
|
|
case PX_Or: {
|
|
TaggedUWord x, y;
|
|
POP(x);
|
|
POP(y);
|
|
PUSH(y | x);
|
|
break;
|
|
}
|
|
case PX_CmpGES: {
|
|
TaggedUWord x, y;
|
|
POP(x);
|
|
POP(y);
|
|
PUSH(y.CmpGEs(x));
|
|
break;
|
|
}
|
|
case PX_Shl: {
|
|
TaggedUWord x, y;
|
|
POP(x);
|
|
POP(y);
|
|
PUSH(y << x);
|
|
break;
|
|
}
|
|
default:
|
|
MOZ_ASSERT(0);
|
|
goto fail;
|
|
}
|
|
} // while (true)
|
|
|
|
// Evaluation finished. The top value on the stack is the result.
|
|
if (stackPointer >= 0) {
|
|
return stack[stackPointer];
|
|
}
|
|
// Else fall through
|
|
|
|
fail:
|
|
return TaggedUWord();
|
|
|
|
#undef PUSH
|
|
#undef POP
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
TaggedUWord LExpr::EvaluateExpr(const UnwindRegs* aOldRegs, TaggedUWord aCFA,
|
|
const StackImage* aStackImg,
|
|
const vector<PfxInstr>* aPfxInstrs) const {
|
|
switch (mHow) {
|
|
case UNKNOWN:
|
|
return TaggedUWord();
|
|
case NODEREF: {
|
|
TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
|
|
tuw = tuw + TaggedUWord((intptr_t)mOffset);
|
|
return tuw;
|
|
}
|
|
case DEREF: {
|
|
TaggedUWord tuw = EvaluateReg(mReg, aOldRegs, aCFA);
|
|
tuw = tuw + TaggedUWord((intptr_t)mOffset);
|
|
return DerefTUW(tuw, aStackImg);
|
|
}
|
|
case PFXEXPR: {
|
|
MOZ_ASSERT(aPfxInstrs);
|
|
if (!aPfxInstrs) {
|
|
return TaggedUWord();
|
|
}
|
|
return EvaluatePfxExpr(mOffset, aOldRegs, aCFA, aStackImg, *aPfxInstrs);
|
|
}
|
|
default:
|
|
MOZ_ASSERT(0);
|
|
return TaggedUWord();
|
|
}
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
static void UseRuleSet(/*MOD*/ UnwindRegs* aRegs, const StackImage* aStackImg,
|
|
const RuleSet* aRS, const vector<PfxInstr>* aPfxInstrs) {
|
|
// Take a copy of regs, since we'll need to refer to the old values
|
|
// whilst computing the new ones.
|
|
UnwindRegs old_regs = *aRegs;
|
|
|
|
// Mark all the current register values as invalid, so that the
|
|
// caller can see, on our return, which ones have been computed
|
|
// anew. If we don't even manage to compute a new PC value, then
|
|
// the caller will have to abandon the unwind.
|
|
// FIXME: Create and use instead: aRegs->SetAllInvalid();
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
aRegs->xbp = TaggedUWord();
|
|
aRegs->xsp = TaggedUWord();
|
|
aRegs->xip = TaggedUWord();
|
|
#elif defined(GP_ARCH_arm)
|
|
aRegs->r7 = TaggedUWord();
|
|
aRegs->r11 = TaggedUWord();
|
|
aRegs->r12 = TaggedUWord();
|
|
aRegs->r13 = TaggedUWord();
|
|
aRegs->r14 = TaggedUWord();
|
|
aRegs->r15 = TaggedUWord();
|
|
#elif defined(GP_ARCH_arm64)
|
|
aRegs->x29 = TaggedUWord();
|
|
aRegs->x30 = TaggedUWord();
|
|
aRegs->sp = TaggedUWord();
|
|
aRegs->pc = TaggedUWord();
|
|
#elif defined(GP_ARCH_mips64)
|
|
aRegs->sp = TaggedUWord();
|
|
aRegs->fp = TaggedUWord();
|
|
aRegs->pc = TaggedUWord();
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
|
|
// This is generally useful.
|
|
const TaggedUWord inval = TaggedUWord();
|
|
|
|
// First, compute the CFA.
|
|
TaggedUWord cfa = aRS->mCfaExpr.EvaluateExpr(&old_regs, inval /*old cfa*/,
|
|
aStackImg, aPfxInstrs);
|
|
|
|
// If we didn't manage to compute the CFA, well .. that's ungood,
|
|
// but keep going anyway. It'll be OK provided none of the register
|
|
// value rules mention the CFA. In any case, compute the new values
|
|
// for each register that we're tracking.
|
|
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
aRegs->xbp =
|
|
aRS->mXbpExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->xsp =
|
|
aRS->mXspExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->xip =
|
|
aRS->mXipExpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
#elif defined(GP_ARCH_arm)
|
|
aRegs->r7 = aRS->mR7expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->r11 =
|
|
aRS->mR11expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->r12 =
|
|
aRS->mR12expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->r13 =
|
|
aRS->mR13expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->r14 =
|
|
aRS->mR14expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->r15 =
|
|
aRS->mR15expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
#elif defined(GP_ARCH_arm64)
|
|
aRegs->x29 =
|
|
aRS->mX29expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->x30 =
|
|
aRS->mX30expr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
#elif defined(GP_ARCH_mips64)
|
|
aRegs->sp = aRS->mSPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->fp = aRS->mFPexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
aRegs->pc = aRS->mPCexpr.EvaluateExpr(&old_regs, cfa, aStackImg, aPfxInstrs);
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
|
|
// We're done. Any regs for which we didn't manage to compute a
|
|
// new value will now be marked as invalid.
|
|
}
|
|
|
|
// RUNS IN NO-MALLOC CONTEXT
|
|
void LUL::Unwind(/*OUT*/ uintptr_t* aFramePCs,
|
|
/*OUT*/ uintptr_t* aFrameSPs,
|
|
/*OUT*/ size_t* aFramesUsed,
|
|
/*OUT*/ size_t* aFramePointerFramesAcquired,
|
|
size_t aFramesAvail, UnwindRegs* aStartRegs,
|
|
StackImage* aStackImg) {
|
|
MOZ_RELEASE_ASSERT(!mAdminMode);
|
|
|
|
/////////////////////////////////////////////////////////
|
|
// BEGIN UNWIND
|
|
|
|
*aFramesUsed = 0;
|
|
|
|
UnwindRegs regs = *aStartRegs;
|
|
TaggedUWord last_valid_sp = TaggedUWord();
|
|
|
|
while (true) {
|
|
if (DEBUG_MAIN) {
|
|
char buf[300];
|
|
mLog("\n");
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
SprintfLiteral(
|
|
buf, "LoopTop: rip %d/%llx rsp %d/%llx rbp %d/%llx\n",
|
|
(int)regs.xip.Valid(), (unsigned long long int)regs.xip.Value(),
|
|
(int)regs.xsp.Valid(), (unsigned long long int)regs.xsp.Value(),
|
|
(int)regs.xbp.Valid(), (unsigned long long int)regs.xbp.Value());
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
#elif defined(GP_ARCH_arm)
|
|
SprintfLiteral(
|
|
buf,
|
|
"LoopTop: r15 %d/%llx r7 %d/%llx r11 %d/%llx"
|
|
" r12 %d/%llx r13 %d/%llx r14 %d/%llx\n",
|
|
(int)regs.r15.Valid(), (unsigned long long int)regs.r15.Value(),
|
|
(int)regs.r7.Valid(), (unsigned long long int)regs.r7.Value(),
|
|
(int)regs.r11.Valid(), (unsigned long long int)regs.r11.Value(),
|
|
(int)regs.r12.Valid(), (unsigned long long int)regs.r12.Value(),
|
|
(int)regs.r13.Valid(), (unsigned long long int)regs.r13.Value(),
|
|
(int)regs.r14.Valid(), (unsigned long long int)regs.r14.Value());
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
#elif defined(GP_ARCH_arm64)
|
|
SprintfLiteral(
|
|
buf,
|
|
"LoopTop: pc %d/%llx x29 %d/%llx x30 %d/%llx"
|
|
" sp %d/%llx\n",
|
|
(int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
|
|
(int)regs.x29.Valid(), (unsigned long long int)regs.x29.Value(),
|
|
(int)regs.x30.Valid(), (unsigned long long int)regs.x30.Value(),
|
|
(int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value());
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
#elif defined(GP_ARCH_mips64)
|
|
SprintfLiteral(
|
|
buf, "LoopTop: pc %d/%llx sp %d/%llx fp %d/%llx\n",
|
|
(int)regs.pc.Valid(), (unsigned long long int)regs.pc.Value(),
|
|
(int)regs.sp.Valid(), (unsigned long long int)regs.sp.Value(),
|
|
(int)regs.fp.Valid(), (unsigned long long int)regs.fp.Value());
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
}
|
|
|
|
#if defined(GP_ARCH_amd64) || defined(GP_ARCH_x86)
|
|
TaggedUWord ia = regs.xip;
|
|
TaggedUWord sp = regs.xsp;
|
|
#elif defined(GP_ARCH_arm)
|
|
TaggedUWord ia = (*aFramesUsed == 0 ? regs.r15 : regs.r14);
|
|
TaggedUWord sp = regs.r13;
|
|
#elif defined(GP_ARCH_arm64)
|
|
TaggedUWord ia = (*aFramesUsed == 0 ? regs.pc : regs.x30);
|
|
TaggedUWord sp = regs.sp;
|
|
#elif defined(GP_ARCH_mips64)
|
|
TaggedUWord ia = regs.pc;
|
|
TaggedUWord sp = regs.sp;
|
|
#else
|
|
# error "Unsupported arch"
|
|
#endif
|
|
|
|
if (*aFramesUsed >= aFramesAvail) {
|
|
break;
|
|
}
|
|
|
|
// If we don't have a valid value for the PC, give up.
|
|
if (!ia.Valid()) {
|
|
break;
|
|
}
|
|
|
|
// If this is the innermost frame, record the SP value, which
|
|
// presumably is valid. If this isn't the innermost frame, and we
|
|
// have a valid SP value, check that its SP value isn't less that
|
|
// the one we've seen so far, so as to catch potential SP value
|
|
// cycles.
|
|
if (*aFramesUsed == 0) {
|
|
last_valid_sp = sp;
|
|
} else {
|
|
MOZ_ASSERT(last_valid_sp.Valid());
|
|
if (sp.Valid()) {
|
|
if (sp.Value() < last_valid_sp.Value()) {
|
|
// Hmm, SP going in the wrong direction. Let's stop.
|
|
break;
|
|
}
|
|
// Remember where we got to.
|
|
last_valid_sp = sp;
|
|
}
|
|
}
|
|
|
|
// For the innermost frame, the IA value is what we need. For all
|
|
// other frames, it's actually the return address, so back up one
|
|
// byte so as to get it into the calling instruction.
|
|
aFramePCs[*aFramesUsed] = ia.Value() - (*aFramesUsed == 0 ? 0 : 1);
|
|
aFrameSPs[*aFramesUsed] = sp.Valid() ? sp.Value() : 0;
|
|
(*aFramesUsed)++;
|
|
|
|
// Find the RuleSet for the current IA, if any. This will also
|
|
// query the backing (secondary) maps if it isn't found in the
|
|
// thread-local cache.
|
|
|
|
// If this isn't the innermost frame, back up into the calling insn.
|
|
if (*aFramesUsed > 1) {
|
|
ia = ia + TaggedUWord((uintptr_t)(-1));
|
|
}
|
|
|
|
pair<const RuleSet*, const vector<PfxInstr>*> ruleset_and_pfxinstrs =
|
|
mPriMap->Lookup(ia.Value());
|
|
const RuleSet* ruleset = ruleset_and_pfxinstrs.first;
|
|
const vector<PfxInstr>* pfxinstrs = ruleset_and_pfxinstrs.second;
|
|
|
|
if (DEBUG_MAIN) {
|
|
char buf[100];
|
|
SprintfLiteral(buf, "ruleset for 0x%llx = %p\n",
|
|
(unsigned long long int)ia.Value(), ruleset);
|
|
buf[sizeof(buf) - 1] = 0;
|
|
mLog(buf);
|
|
}
|
|
|
|
#if defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
|
|
/////////////////////////////////////////////
|
|
////
|
|
// On 32 bit x86-linux, syscalls are often done via the VDSO
|
|
// function __kernel_vsyscall, which doesn't have a corresponding
|
|
// object that we can read debuginfo from. That effectively kills
|
|
// off all stack traces for threads blocked in syscalls. Hence
|
|
// special-case by looking at the code surrounding the program
|
|
// counter.
|
|
//
|
|
// 0xf7757420 <__kernel_vsyscall+0>: push %ecx
|
|
// 0xf7757421 <__kernel_vsyscall+1>: push %edx
|
|
// 0xf7757422 <__kernel_vsyscall+2>: push %ebp
|
|
// 0xf7757423 <__kernel_vsyscall+3>: mov %esp,%ebp
|
|
// 0xf7757425 <__kernel_vsyscall+5>: sysenter
|
|
// 0xf7757427 <__kernel_vsyscall+7>: nop
|
|
// 0xf7757428 <__kernel_vsyscall+8>: nop
|
|
// 0xf7757429 <__kernel_vsyscall+9>: nop
|
|
// 0xf775742a <__kernel_vsyscall+10>: nop
|
|
// 0xf775742b <__kernel_vsyscall+11>: nop
|
|
// 0xf775742c <__kernel_vsyscall+12>: nop
|
|
// 0xf775742d <__kernel_vsyscall+13>: nop
|
|
// 0xf775742e <__kernel_vsyscall+14>: int $0x80
|
|
// 0xf7757430 <__kernel_vsyscall+16>: pop %ebp
|
|
// 0xf7757431 <__kernel_vsyscall+17>: pop %edx
|
|
// 0xf7757432 <__kernel_vsyscall+18>: pop %ecx
|
|
// 0xf7757433 <__kernel_vsyscall+19>: ret
|
|
//
|
|
// In cases where the sampled thread is blocked in a syscall, its
|
|
// program counter will point at "pop %ebp". Hence we look for
|
|
// the sequence "int $0x80; pop %ebp; pop %edx; pop %ecx; ret", and
|
|
// the corresponding register-recovery actions are:
|
|
// new_ebp = *(old_esp + 0)
|
|
// new eip = *(old_esp + 12)
|
|
// new_esp = old_esp + 16
|
|
//
|
|
// It may also be the case that the program counter points two
|
|
// nops before the "int $0x80", viz, is __kernel_vsyscall+12, in
|
|
// the case where the syscall has been restarted but the thread
|
|
// hasn't been rescheduled. The code below doesn't handle that;
|
|
// it could easily be made to.
|
|
//
|
|
if (!ruleset && *aFramesUsed == 1 && ia.Valid() && sp.Valid()) {
|
|
uintptr_t insns_min, insns_max;
|
|
uintptr_t eip = ia.Value();
|
|
bool b = mSegArray->getBoundingCodeSegment(&insns_min, &insns_max, eip);
|
|
if (b && eip - 2 >= insns_min && eip + 3 <= insns_max) {
|
|
uint8_t* eipC = (uint8_t*)eip;
|
|
if (eipC[-2] == 0xCD && eipC[-1] == 0x80 && eipC[0] == 0x5D &&
|
|
eipC[1] == 0x5A && eipC[2] == 0x59 && eipC[3] == 0xC3) {
|
|
TaggedUWord sp_plus_0 = sp;
|
|
TaggedUWord sp_plus_12 = sp;
|
|
TaggedUWord sp_plus_16 = sp;
|
|
sp_plus_12 = sp_plus_12 + TaggedUWord(12);
|
|
sp_plus_16 = sp_plus_16 + TaggedUWord(16);
|
|
TaggedUWord new_ebp = DerefTUW(sp_plus_0, aStackImg);
|
|
TaggedUWord new_eip = DerefTUW(sp_plus_12, aStackImg);
|
|
TaggedUWord new_esp = sp_plus_16;
|
|
if (new_ebp.Valid() && new_eip.Valid() && new_esp.Valid()) {
|
|
regs.xbp = new_ebp;
|
|
regs.xip = new_eip;
|
|
regs.xsp = new_esp;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
////
|
|
/////////////////////////////////////////////
|
|
#endif // defined(GP_PLAT_x86_android) || defined(GP_PLAT_x86_linux)
|
|
|
|
// So, do we have a ruleset for this address? If so, use it now.
|
|
if (ruleset) {
|
|
if (DEBUG_MAIN) {
|
|
ruleset->Print(ia.Value(), 1 /*bogus, but doesn't matter*/, mLog);
|
|
mLog("\n");
|
|
}
|
|
// Use the RuleSet to compute the registers for the previous
|
|
// frame. |regs| is modified in-place.
|
|
UseRuleSet(®s, aStackImg, ruleset, pfxinstrs);
|
|
continue;
|
|
}
|
|
|
|
#if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
|
|
defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
|
|
defined(GP_PLAT_amd64_freebsd)
|
|
// There's no RuleSet for the specified address. On amd64/x86_linux, see if
|
|
// it's possible to recover the caller's frame by using the frame pointer.
|
|
|
|
// We seek to compute (new_IP, new_SP, new_BP) from (old_BP, stack image),
|
|
// and assume the following layout:
|
|
//
|
|
// <--- new_SP
|
|
// +----------+
|
|
// | new_IP | (return address)
|
|
// +----------+
|
|
// | new_BP | <--- old_BP
|
|
// +----------+
|
|
// | .... |
|
|
// | .... |
|
|
// | .... |
|
|
// +----------+ <---- old_SP (arbitrary, but must be <= old_BP)
|
|
|
|
const size_t wordSzB = sizeof(uintptr_t);
|
|
TaggedUWord old_xsp = regs.xsp;
|
|
|
|
// points at new_BP ?
|
|
TaggedUWord old_xbp = regs.xbp;
|
|
// points at new_IP ?
|
|
TaggedUWord old_xbp_plus1 = regs.xbp + TaggedUWord(1 * wordSzB);
|
|
// is the new_SP ?
|
|
TaggedUWord old_xbp_plus2 = regs.xbp + TaggedUWord(2 * wordSzB);
|
|
|
|
if (old_xbp.Valid() && old_xbp.IsAligned() && old_xsp.Valid() &&
|
|
old_xsp.IsAligned() && old_xsp.Value() <= old_xbp.Value()) {
|
|
// We don't need to do any range, alignment or validity checks for
|
|
// addresses passed to DerefTUW, since that performs them itself, and
|
|
// returns an invalid value on failure. Any such value will poison
|
|
// subsequent uses, and we do a final check for validity before putting
|
|
// the computed values into |regs|.
|
|
TaggedUWord new_xbp = DerefTUW(old_xbp, aStackImg);
|
|
if (new_xbp.Valid() && new_xbp.IsAligned() &&
|
|
old_xbp.Value() < new_xbp.Value()) {
|
|
TaggedUWord new_xip = DerefTUW(old_xbp_plus1, aStackImg);
|
|
TaggedUWord new_xsp = old_xbp_plus2;
|
|
if (new_xbp.Valid() && new_xip.Valid() && new_xsp.Valid()) {
|
|
regs.xbp = new_xbp;
|
|
regs.xip = new_xip;
|
|
regs.xsp = new_xsp;
|
|
(*aFramePointerFramesAcquired)++;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
#elif defined(GP_ARCH_arm64)
|
|
// Here is an example of generated code for prologue and epilogue..
|
|
//
|
|
// stp x29, x30, [sp, #-16]!
|
|
// mov x29, sp
|
|
// ...
|
|
// ldp x29, x30, [sp], #16
|
|
// ret
|
|
//
|
|
// Next is another example of generated code.
|
|
//
|
|
// stp x20, x19, [sp, #-32]!
|
|
// stp x29, x30, [sp, #16]
|
|
// add x29, sp, #0x10
|
|
// ...
|
|
// ldp x29, x30, [sp, #16]
|
|
// ldp x20, x19, [sp], #32
|
|
// ret
|
|
//
|
|
// Previous x29 and x30 register are stored in the address of x29 register.
|
|
// But since sp register value depends on local variables, we cannot compute
|
|
// previous sp register from current sp/fp/lr register and there is no
|
|
// regular rule for sp register in prologue. But since return address is lr
|
|
// register, if x29 is valid, we will get return address without sp
|
|
// register.
|
|
//
|
|
// So we assume the following layout that if no rule set. x29 is frame
|
|
// pointer, so we will be able to compute x29 and x30 .
|
|
//
|
|
// +----------+ <--- new_sp (cannot compute)
|
|
// | .... |
|
|
// +----------+
|
|
// | new_lr | (return address)
|
|
// +----------+
|
|
// | new_fp | <--- old_fp
|
|
// +----------+
|
|
// | .... |
|
|
// | .... |
|
|
// +----------+ <---- old_sp (arbitrary, but unused)
|
|
|
|
TaggedUWord old_fp = regs.x29;
|
|
if (old_fp.Valid() && old_fp.IsAligned() && last_valid_sp.Valid() &&
|
|
last_valid_sp.Value() <= old_fp.Value()) {
|
|
TaggedUWord new_fp = DerefTUW(old_fp, aStackImg);
|
|
if (new_fp.Valid() && new_fp.IsAligned() &&
|
|
old_fp.Value() < new_fp.Value()) {
|
|
TaggedUWord old_fp_plus1 = old_fp + TaggedUWord(8);
|
|
TaggedUWord new_lr = DerefTUW(old_fp_plus1, aStackImg);
|
|
if (new_lr.Valid()) {
|
|
regs.x29 = new_fp;
|
|
regs.x30 = new_lr;
|
|
// When using frame pointer to walk stack, we cannot compute sp
|
|
// register since we cannot compute sp register from fp/lr/sp
|
|
// register, and there is no regular rule to compute previous sp
|
|
// register. So mark as invalid.
|
|
regs.sp = TaggedUWord();
|
|
(*aFramePointerFramesAcquired)++;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
#endif // defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||
|
|
// defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||
|
|
// defined(GP_PLAT_amd64_freebsd)
|
|
|
|
// We failed to recover a frame either using CFI or FP chasing, and we
|
|
// have no other ways to recover the frame. So we have to give up.
|
|
break;
|
|
|
|
} // top level unwind loop
|
|
|
|
// END UNWIND
|
|
/////////////////////////////////////////////////////////
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////
|
|
// LUL Unit Testing //
|
|
////////////////////////////////////////////////////////////////
|
|
|
|
static const int LUL_UNIT_TEST_STACK_SIZE = 32768;
|
|
|
|
#if defined(GP_ARCH_mips64)
|
|
static __attribute__((noinline)) unsigned long __getpc(void) {
|
|
unsigned long rtaddr;
|
|
__asm__ volatile("move %0, $31" : "=r"(rtaddr));
|
|
return rtaddr;
|
|
}
|
|
#endif
|
|
|
|
// This function is innermost in the test call sequence. It uses LUL
|
|
// to unwind, and compares the result with the sequence specified in
|
|
// the director string. These need to agree in order for the test to
|
|
// pass. In order not to screw up the results, this function needs
|
|
// to have a not-very big stack frame, since we're only presenting
|
|
// the innermost LUL_UNIT_TEST_STACK_SIZE bytes of stack to LUL, and
|
|
// that chunk unavoidably includes the frame for this function.
|
|
//
|
|
// This function must not be inlined into its callers. Doing so will
|
|
// cause the expected-vs-actual backtrace consistency checking to
|
|
// fail. Prints summary results to |aLUL|'s logging sink and also
|
|
// returns a boolean indicating whether or not the test passed.
|
|
static __attribute__((noinline)) bool GetAndCheckStackTrace(
|
|
LUL* aLUL, const char* dstring) {
|
|
// Get hold of the current unwind-start registers.
|
|
UnwindRegs startRegs;
|
|
memset(&startRegs, 0, sizeof(startRegs));
|
|
#if defined(GP_ARCH_amd64)
|
|
volatile uintptr_t block[3];
|
|
MOZ_ASSERT(sizeof(block) == 24);
|
|
__asm__ __volatile__(
|
|
"leaq 0(%%rip), %%r15"
|
|
"\n\t"
|
|
"movq %%r15, 0(%0)"
|
|
"\n\t"
|
|
"movq %%rsp, 8(%0)"
|
|
"\n\t"
|
|
"movq %%rbp, 16(%0)"
|
|
"\n"
|
|
:
|
|
: "r"(&block[0])
|
|
: "memory", "r15");
|
|
startRegs.xip = TaggedUWord(block[0]);
|
|
startRegs.xsp = TaggedUWord(block[1]);
|
|
startRegs.xbp = TaggedUWord(block[2]);
|
|
const uintptr_t REDZONE_SIZE = 128;
|
|
uintptr_t start = block[1] - REDZONE_SIZE;
|
|
#elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
|
|
volatile uintptr_t block[3];
|
|
MOZ_ASSERT(sizeof(block) == 12);
|
|
__asm__ __volatile__(
|
|
".byte 0xE8,0x00,0x00,0x00,0x00" /*call next insn*/
|
|
"\n\t"
|
|
"popl %%edi"
|
|
"\n\t"
|
|
"movl %%edi, 0(%0)"
|
|
"\n\t"
|
|
"movl %%esp, 4(%0)"
|
|
"\n\t"
|
|
"movl %%ebp, 8(%0)"
|
|
"\n"
|
|
:
|
|
: "r"(&block[0])
|
|
: "memory", "edi");
|
|
startRegs.xip = TaggedUWord(block[0]);
|
|
startRegs.xsp = TaggedUWord(block[1]);
|
|
startRegs.xbp = TaggedUWord(block[2]);
|
|
const uintptr_t REDZONE_SIZE = 0;
|
|
uintptr_t start = block[1] - REDZONE_SIZE;
|
|
#elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
|
|
volatile uintptr_t block[6];
|
|
MOZ_ASSERT(sizeof(block) == 24);
|
|
__asm__ __volatile__(
|
|
"mov r0, r15"
|
|
"\n\t"
|
|
"str r0, [%0, #0]"
|
|
"\n\t"
|
|
"str r14, [%0, #4]"
|
|
"\n\t"
|
|
"str r13, [%0, #8]"
|
|
"\n\t"
|
|
"str r12, [%0, #12]"
|
|
"\n\t"
|
|
"str r11, [%0, #16]"
|
|
"\n\t"
|
|
"str r7, [%0, #20]"
|
|
"\n"
|
|
:
|
|
: "r"(&block[0])
|
|
: "memory", "r0");
|
|
startRegs.r15 = TaggedUWord(block[0]);
|
|
startRegs.r14 = TaggedUWord(block[1]);
|
|
startRegs.r13 = TaggedUWord(block[2]);
|
|
startRegs.r12 = TaggedUWord(block[3]);
|
|
startRegs.r11 = TaggedUWord(block[4]);
|
|
startRegs.r7 = TaggedUWord(block[5]);
|
|
const uintptr_t REDZONE_SIZE = 0;
|
|
uintptr_t start = block[1] - REDZONE_SIZE;
|
|
#elif defined(GP_ARCH_arm64)
|
|
volatile uintptr_t block[4];
|
|
MOZ_ASSERT(sizeof(block) == 32);
|
|
__asm__ __volatile__(
|
|
"adr x0, . \n\t"
|
|
"str x0, [%0, #0] \n\t"
|
|
"str x29, [%0, #8] \n\t"
|
|
"str x30, [%0, #16] \n\t"
|
|
"mov x0, sp \n\t"
|
|
"str x0, [%0, #24] \n\t"
|
|
:
|
|
: "r"(&block[0])
|
|
: "memory", "x0");
|
|
startRegs.pc = TaggedUWord(block[0]);
|
|
startRegs.x29 = TaggedUWord(block[1]);
|
|
startRegs.x30 = TaggedUWord(block[2]);
|
|
startRegs.sp = TaggedUWord(block[3]);
|
|
const uintptr_t REDZONE_SIZE = 0;
|
|
uintptr_t start = block[1] - REDZONE_SIZE;
|
|
#elif defined(GP_ARCH_mips64)
|
|
volatile uintptr_t block[3];
|
|
MOZ_ASSERT(sizeof(block) == 24);
|
|
__asm__ __volatile__(
|
|
"sd $29, 8(%0) \n"
|
|
"sd $30, 16(%0) \n"
|
|
:
|
|
: "r"(block)
|
|
: "memory");
|
|
block[0] = __getpc();
|
|
startRegs.pc = TaggedUWord(block[0]);
|
|
startRegs.sp = TaggedUWord(block[1]);
|
|
startRegs.fp = TaggedUWord(block[2]);
|
|
const uintptr_t REDZONE_SIZE = 0;
|
|
uintptr_t start = block[1] - REDZONE_SIZE;
|
|
#else
|
|
# error "Unsupported platform"
|
|
#endif
|
|
|
|
// Get hold of the innermost LUL_UNIT_TEST_STACK_SIZE bytes of the
|
|
// stack.
|
|
uintptr_t end = start + LUL_UNIT_TEST_STACK_SIZE;
|
|
uintptr_t ws = sizeof(void*);
|
|
start &= ~(ws - 1);
|
|
end &= ~(ws - 1);
|
|
uintptr_t nToCopy = end - start;
|
|
if (nToCopy > lul::N_STACK_BYTES) {
|
|
nToCopy = lul::N_STACK_BYTES;
|
|
}
|
|
MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
|
|
StackImage* stackImg = new StackImage();
|
|
stackImg->mLen = nToCopy;
|
|
stackImg->mStartAvma = start;
|
|
if (nToCopy > 0) {
|
|
MOZ_MAKE_MEM_DEFINED((void*)start, nToCopy);
|
|
memcpy(&stackImg->mContents[0], (void*)start, nToCopy);
|
|
}
|
|
|
|
// Unwind it.
|
|
const int MAX_TEST_FRAMES = 64;
|
|
uintptr_t framePCs[MAX_TEST_FRAMES];
|
|
uintptr_t frameSPs[MAX_TEST_FRAMES];
|
|
size_t framesAvail = mozilla::ArrayLength(framePCs);
|
|
size_t framesUsed = 0;
|
|
size_t framePointerFramesAcquired = 0;
|
|
aLUL->Unwind(&framePCs[0], &frameSPs[0], &framesUsed,
|
|
&framePointerFramesAcquired, framesAvail, &startRegs, stackImg);
|
|
|
|
delete stackImg;
|
|
|
|
// if (0) {
|
|
// // Show what we have.
|
|
// fprintf(stderr, "Got %d frames:\n", (int)framesUsed);
|
|
// for (size_t i = 0; i < framesUsed; i++) {
|
|
// fprintf(stderr, " [%2d] SP %p PC %p\n",
|
|
// (int)i, (void*)frameSPs[i], (void*)framePCs[i]);
|
|
// }
|
|
// fprintf(stderr, "\n");
|
|
//}
|
|
|
|
// Check to see if there's a consistent binding between digits in
|
|
// the director string ('1' .. '8') and the PC values acquired by
|
|
// the unwind. If there isn't, the unwinding has failed somehow.
|
|
uintptr_t binding[8]; // binding for '1' .. binding for '8'
|
|
memset((void*)binding, 0, sizeof(binding));
|
|
|
|
// The general plan is to work backwards along the director string
|
|
// and forwards along the framePCs array. Doing so corresponds to
|
|
// working outwards from the innermost frame of the recursive test set.
|
|
const char* cursor = dstring;
|
|
|
|
// Find the end. This leaves |cursor| two bytes past the first
|
|
// character we want to look at -- see comment below.
|
|
while (*cursor) cursor++;
|
|
|
|
// Counts the number of consistent frames.
|
|
size_t nConsistent = 0;
|
|
|
|
// Iterate back to the start of the director string. The starting
|
|
// points are a bit complex. We can't use framePCs[0] because that
|
|
// contains the PC in this frame (above). We can't use framePCs[1]
|
|
// because that will contain the PC at return point in the recursive
|
|
// test group (TestFn[1-8]) for their call "out" to this function,
|
|
// GetAndCheckStackTrace. Although LUL will compute a correct
|
|
// return address, that will not be the same return address as for a
|
|
// recursive call out of the the function to another function in the
|
|
// group. Hence we can only start consistency checking at
|
|
// framePCs[2].
|
|
//
|
|
// To be consistent, then, we must ignore the last element in the
|
|
// director string as that corresponds to framePCs[1]. Hence the
|
|
// start points are: framePCs[2] and the director string 2 bytes
|
|
// before the terminating zero.
|
|
//
|
|
// Also as a result of this, the number of consistent frames counted
|
|
// will always be one less than the length of the director string
|
|
// (not including its terminating zero).
|
|
size_t frameIx;
|
|
for (cursor = cursor - 2, frameIx = 2;
|
|
cursor >= dstring && frameIx < framesUsed; cursor--, frameIx++) {
|
|
char c = *cursor;
|
|
uintptr_t pc = framePCs[frameIx];
|
|
// If this doesn't hold, the director string is ill-formed.
|
|
MOZ_ASSERT(c >= '1' && c <= '8');
|
|
int n = ((int)c) - ((int)'1');
|
|
if (binding[n] == 0) {
|
|
// There's no binding for |c| yet, so install |pc| and carry on.
|
|
binding[n] = pc;
|
|
nConsistent++;
|
|
continue;
|
|
}
|
|
// There's a pre-existing binding for |c|. Check it's consistent.
|
|
if (binding[n] != pc) {
|
|
// Not consistent. Give up now.
|
|
break;
|
|
}
|
|
// Consistent. Keep going.
|
|
nConsistent++;
|
|
}
|
|
|
|
// So, did we succeed?
|
|
bool passed = nConsistent + 1 == strlen(dstring);
|
|
|
|
// Show the results.
|
|
char buf[200];
|
|
SprintfLiteral(buf, "LULUnitTest: dstring = %s\n", dstring);
|
|
buf[sizeof(buf) - 1] = 0;
|
|
aLUL->mLog(buf);
|
|
SprintfLiteral(buf, "LULUnitTest: %d consistent, %d in dstring: %s\n",
|
|
(int)nConsistent, (int)strlen(dstring),
|
|
passed ? "PASS" : "FAIL");
|
|
buf[sizeof(buf) - 1] = 0;
|
|
aLUL->mLog(buf);
|
|
|
|
return passed;
|
|
}
|
|
|
|
// Macro magic to create a set of 8 mutually recursive functions with
|
|
// varying frame sizes. These will recurse amongst themselves as
|
|
// specified by |strP|, the directory string, and call
|
|
// GetAndCheckStackTrace when the string becomes empty, passing it the
|
|
// original value of the string. This checks the result, printing
|
|
// results on |aLUL|'s logging sink, and also returns a boolean
|
|
// indicating whether or not the results are acceptable (correct).
|
|
|
|
#define DECL_TEST_FN(NAME) \
|
|
bool NAME(LUL* aLUL, const char* strPorig, const char* strP);
|
|
|
|
#define GEN_TEST_FN(NAME, FRAMESIZE) \
|
|
bool NAME(LUL* aLUL, const char* strPorig, const char* strP) { \
|
|
/* Create a frame of size (at least) FRAMESIZE, so that the */ \
|
|
/* 8 functions created by this macro offer some variation in frame */ \
|
|
/* sizes. This isn't as simple as it might seem, since a clever */ \
|
|
/* optimizing compiler (eg, clang-5) detects that the array is unused */ \
|
|
/* and removes it. We try to defeat this by passing it to a function */ \
|
|
/* in a different compilation unit, and hoping that clang does not */ \
|
|
/* notice that the call is a no-op. */ \
|
|
char space[FRAMESIZE]; \
|
|
Unused << write(1, space, 0); /* write zero bytes of |space| to stdout */ \
|
|
\
|
|
if (*strP == '\0') { \
|
|
/* We've come to the end of the director string. */ \
|
|
/* Take a stack snapshot. */ \
|
|
return GetAndCheckStackTrace(aLUL, strPorig); \
|
|
} else { \
|
|
/* Recurse onwards. This is a bit subtle. The obvious */ \
|
|
/* thing to do here is call onwards directly, from within the */ \
|
|
/* arms of the case statement. That gives a problem in that */ \
|
|
/* there will be multiple return points inside each function when */ \
|
|
/* unwinding, so it will be difficult to check for consistency */ \
|
|
/* against the director string. Instead, we make an indirect */ \
|
|
/* call, so as to guarantee that there is only one call site */ \
|
|
/* within each function. This does assume that the compiler */ \
|
|
/* won't transform it back to the simple direct-call form. */ \
|
|
/* To discourage it from doing so, the call is bracketed with */ \
|
|
/* __asm__ __volatile__ sections so as to make it not-movable. */ \
|
|
bool (*nextFn)(LUL*, const char*, const char*) = NULL; \
|
|
switch (*strP) { \
|
|
case '1': \
|
|
nextFn = TestFn1; \
|
|
break; \
|
|
case '2': \
|
|
nextFn = TestFn2; \
|
|
break; \
|
|
case '3': \
|
|
nextFn = TestFn3; \
|
|
break; \
|
|
case '4': \
|
|
nextFn = TestFn4; \
|
|
break; \
|
|
case '5': \
|
|
nextFn = TestFn5; \
|
|
break; \
|
|
case '6': \
|
|
nextFn = TestFn6; \
|
|
break; \
|
|
case '7': \
|
|
nextFn = TestFn7; \
|
|
break; \
|
|
case '8': \
|
|
nextFn = TestFn8; \
|
|
break; \
|
|
default: \
|
|
nextFn = TestFn8; \
|
|
break; \
|
|
} \
|
|
/* "use" |space| immediately after the recursive call, */ \
|
|
/* so as to dissuade clang from deallocating the space while */ \
|
|
/* the call is active, or otherwise messing with the stack frame. */ \
|
|
__asm__ __volatile__("" ::: "cc", "memory"); \
|
|
bool passed = nextFn(aLUL, strPorig, strP + 1); \
|
|
Unused << write(1, space, 0); \
|
|
__asm__ __volatile__("" ::: "cc", "memory"); \
|
|
return passed; \
|
|
} \
|
|
}
|
|
|
|
// The test functions are mutually recursive, so it is necessary to
|
|
// declare them before defining them.
|
|
DECL_TEST_FN(TestFn1)
|
|
DECL_TEST_FN(TestFn2)
|
|
DECL_TEST_FN(TestFn3)
|
|
DECL_TEST_FN(TestFn4)
|
|
DECL_TEST_FN(TestFn5)
|
|
DECL_TEST_FN(TestFn6)
|
|
DECL_TEST_FN(TestFn7)
|
|
DECL_TEST_FN(TestFn8)
|
|
|
|
GEN_TEST_FN(TestFn1, 123)
|
|
GEN_TEST_FN(TestFn2, 456)
|
|
GEN_TEST_FN(TestFn3, 789)
|
|
GEN_TEST_FN(TestFn4, 23)
|
|
GEN_TEST_FN(TestFn5, 47)
|
|
GEN_TEST_FN(TestFn6, 117)
|
|
GEN_TEST_FN(TestFn7, 1)
|
|
GEN_TEST_FN(TestFn8, 99)
|
|
|
|
// This starts the test sequence going. Call here to generate a
|
|
// sequence of calls as directed by the string |dstring|. The call
|
|
// sequence will, from its innermost frame, finish by calling
|
|
// GetAndCheckStackTrace() and passing it |dstring|.
|
|
// GetAndCheckStackTrace() will unwind the stack, check consistency
|
|
// of those results against |dstring|, and print a pass/fail message
|
|
// to aLUL's logging sink. It also updates the counters in *aNTests
|
|
// and aNTestsPassed.
|
|
__attribute__((noinline)) void TestUnw(/*OUT*/ int* aNTests,
|
|
/*OUT*/ int* aNTestsPassed, LUL* aLUL,
|
|
const char* dstring) {
|
|
// Ensure that the stack has at least this much space on it. This
|
|
// makes it safe to saw off the top LUL_UNIT_TEST_STACK_SIZE bytes
|
|
// and hand it to LUL. Safe in the sense that no segfault can
|
|
// happen because the stack is at least this big. This is all
|
|
// somewhat dubious in the sense that a sufficiently clever compiler
|
|
// (clang, for one) can figure out that space[] is unused and delete
|
|
// it from the frame. Hence the somewhat elaborate hoop jumping to
|
|
// fill it up before the call and to at least appear to use the
|
|
// value afterwards.
|
|
int i;
|
|
volatile char space[LUL_UNIT_TEST_STACK_SIZE];
|
|
for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
|
|
space[i] = (char)(i & 0x7F);
|
|
}
|
|
|
|
// Really run the test.
|
|
bool passed = TestFn1(aLUL, dstring, dstring);
|
|
|
|
// Appear to use space[], by visiting the value to compute some kind
|
|
// of checksum, and then (apparently) using the checksum.
|
|
int sum = 0;
|
|
for (i = 0; i < LUL_UNIT_TEST_STACK_SIZE; i++) {
|
|
// If this doesn't fool LLVM, I don't know what will.
|
|
sum += space[i] - 3 * i;
|
|
}
|
|
__asm__ __volatile__("" : : "r"(sum));
|
|
|
|
// Update the counters.
|
|
(*aNTests)++;
|
|
if (passed) {
|
|
(*aNTestsPassed)++;
|
|
}
|
|
}
|
|
|
|
void RunLulUnitTests(/*OUT*/ int* aNTests, /*OUT*/ int* aNTestsPassed,
|
|
LUL* aLUL) {
|
|
aLUL->mLog(":\n");
|
|
aLUL->mLog("LULUnitTest: BEGIN\n");
|
|
*aNTests = *aNTestsPassed = 0;
|
|
TestUnw(aNTests, aNTestsPassed, aLUL, "11111111");
|
|
TestUnw(aNTests, aNTestsPassed, aLUL, "11222211");
|
|
TestUnw(aNTests, aNTestsPassed, aLUL, "111222333");
|
|
TestUnw(aNTests, aNTestsPassed, aLUL, "1212121231212331212121212121212");
|
|
TestUnw(aNTests, aNTestsPassed, aLUL, "31415827271828325332173258");
|
|
TestUnw(aNTests, aNTestsPassed, aLUL,
|
|
"123456781122334455667788777777777777777777777");
|
|
aLUL->mLog("LULUnitTest: END\n");
|
|
aLUL->mLog(":\n");
|
|
}
|
|
|
|
} // namespace lul
|