gecko-dev/tools/jprof/stub/libmalloc.cpp

741 строка
20 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
// vim:cindent:sw=4:et:ts=8:
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// The linux glibc hides part of sigaction if _POSIX_SOURCE is defined
#if defined(linux)
# undef _POSIX_SOURCE
# undef _SVID_SOURCE
# ifndef _GNU_SOURCE
# define _GNU_SOURCE
# endif
#endif
#include <errno.h>
#if defined(linux)
# include <linux/rtc.h>
# include <pthread.h>
#endif
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <ucontext.h>
#include <execinfo.h>
#include "libmalloc.h"
#include "jprof.h"
#include <string.h>
#include <errno.h>
#include <dlfcn.h>
#ifdef NTO
# include <sys/link.h>
extern r_debug _r_debug;
#else
# include <link.h>
#endif
#define USE_GLIBC_BACKTRACE 1
// To debug, use #define JPROF_STATIC
#define JPROF_STATIC static
static int gLogFD = -1;
static pthread_t main_thread;
static bool gIsSlave = false;
static int gFilenamePID;
static void startSignalCounter(unsigned long millisec);
static int enableRTCSignals(bool enable);
//----------------------------------------------------------------------
// replace use of atexit()
static void DumpAddressMap();
struct JprofShutdown {
JprofShutdown() {}
~JprofShutdown() { DumpAddressMap(); }
};
static void RegisterJprofShutdown() {
// This instanciates the dummy class above, and will trigger the class
// destructor when libxul is unloaded. This is equivalent to atexit(),
// but gracefully handles dlclose().
static JprofShutdown t;
}
#if defined(i386) || defined(_i386) || defined(__x86_64__)
JPROF_STATIC void CrawlStack(malloc_log_entry* me, void* stack_top,
void* top_instr_ptr) {
# if USE_GLIBC_BACKTRACE
// This probably works on more than x86! But we need a way to get the
// top instruction pointer, which is kindof arch-specific
void* array[500];
int cnt, i;
u_long numpcs = 0;
// This is from glibc. A more generic version might use
// libunwind and/or CaptureStackBackTrace() on Windows
cnt = backtrace(&array[0], sizeof(array) / sizeof(array[0]));
// StackHook->JprofLog->CrawlStack
// Then we have sigaction, which replaced top_instr_ptr
array[3] = top_instr_ptr;
for (i = 3; i < cnt; i++) {
me->pcs[numpcs++] = (char*)array[i];
}
me->numpcs = numpcs;
# else
// original code - this breaks on many platforms
void** bp;
# if defined(__i386)
__asm__("movl %%ebp, %0" : "=g"(bp));
# elif defined(__x86_64__)
__asm__("movq %%rbp, %0" : "=g"(bp));
# else
// It would be nice if this worked uniformly, but at least on i386 and
// x86_64, it stopped working with gcc 4.1, because it points to the
// end of the saved registers instead of the start.
bp = __builtin_frame_address(0);
# endif
u_long numpcs = 0;
bool tracing = false;
me->pcs[numpcs++] = (char*)top_instr_ptr;
while (numpcs < MAX_STACK_CRAWL) {
void** nextbp = (void**)*bp++;
void* pc = *bp;
if (nextbp < bp) {
break;
}
if (tracing) {
// Skip the signal handling.
me->pcs[numpcs++] = (char*)pc;
} else if (pc == top_instr_ptr) {
tracing = true;
}
bp = nextbp;
}
me->numpcs = numpcs;
# endif
}
#endif
//----------------------------------------------------------------------
static int rtcHz;
static int rtcFD = -1;
static bool circular = false;
#if defined(linux) || defined(NTO)
static void DumpAddressMap() {
// Turn off the timer so we don't get interrupts during shutdown
# if defined(linux)
if (rtcHz) {
enableRTCSignals(false);
} else
# endif
{
startSignalCounter(0);
}
char filename[2048];
if (gIsSlave)
snprintf(filename, sizeof(filename), "%s-%d", M_MAPFILE, gFilenamePID);
else
snprintf(filename, sizeof(filename), "%s", M_MAPFILE);
int mfd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
if (mfd >= 0) {
malloc_map_entry mme;
link_map* map = _r_debug.r_map;
while (nullptr != map) {
if (map->l_name && *map->l_name) {
mme.nameLen = strlen(map->l_name);
mme.address = map->l_addr;
write(mfd, &mme, sizeof(mme));
write(mfd, map->l_name, mme.nameLen);
# if 0
write(1, map->l_name, mme.nameLen);
write(1, "\n", 1);
# endif
}
map = map->l_next;
}
close(mfd);
}
}
#endif
static bool was_paused = true;
JPROF_STATIC void JprofBufferDump();
JPROF_STATIC void JprofBufferClear();
static void ClearProfilingHook(int signum) {
if (circular) {
JprofBufferClear();
puts("Jprof: cleared circular buffer.");
}
}
static void EndProfilingHook(int signum) {
if (circular) JprofBufferDump();
DumpAddressMap();
was_paused = true;
puts("Jprof: profiling paused.");
}
//----------------------------------------------------------------------
// proper usage would be a template, including the function to find the
// size of an entry, or include a size header explicitly to each entry.
#if defined(linux)
# define DUMB_LOCK() pthread_mutex_lock(&mutex);
# define DUMB_UNLOCK() pthread_mutex_unlock(&mutex);
#else
# define DUMB_LOCK() FIXME()
# define DUMB_UNLOCK() FIXME()
#endif
class DumbCircularBuffer {
public:
DumbCircularBuffer(size_t init_buffer_size) {
used = 0;
buffer_size = init_buffer_size;
buffer = (unsigned char*)malloc(buffer_size);
head = tail = buffer;
#if defined(linux)
pthread_mutexattr_t mAttr;
pthread_mutexattr_settype(&mAttr, PTHREAD_MUTEX_RECURSIVE_NP);
pthread_mutex_init(&mutex, &mAttr);
pthread_mutexattr_destroy(&mAttr);
#endif
}
~DumbCircularBuffer() {
free(buffer);
#if defined(linux)
pthread_mutex_destroy(&mutex);
#endif
}
void clear() {
DUMB_LOCK();
head = tail;
used = 0;
DUMB_UNLOCK();
}
bool empty() { return head == tail; }
size_t space_available() {
size_t result;
DUMB_LOCK();
if (tail > head)
result = buffer_size - (tail - head) - 1;
else
result = head - tail - 1;
DUMB_UNLOCK();
return result;
}
void drop(size_t size) {
// assumes correctness!
DUMB_LOCK();
head += size;
if (head >= &buffer[buffer_size]) head -= buffer_size;
used--;
DUMB_UNLOCK();
}
bool insert(void* data, size_t size) {
// can fail if not enough space in the entire buffer
DUMB_LOCK();
if (space_available() < size) return false;
size_t max_without_wrap = &buffer[buffer_size] - tail;
size_t initial = size > max_without_wrap ? max_without_wrap : size;
#if DEBUG_CIRCULAR
fprintf(stderr, "insert(%d): max_without_wrap %d, size %d, initial %d\n",
used, max_without_wrap, size, initial);
#endif
memcpy(tail, data, initial);
tail += initial;
data = ((char*)data) + initial;
size -= initial;
if (size != 0) {
#if DEBUG_CIRCULAR
fprintf(stderr, "wrapping by %d bytes\n", size);
#endif
memcpy(buffer, data, size);
tail = &(((unsigned char*)buffer)[size]);
}
used++;
DUMB_UNLOCK();
return true;
}
// for external access to the buffer (saving)
void lock() { DUMB_LOCK(); }
void unlock() { DUMB_UNLOCK(); }
// XXX These really shouldn't be public...
unsigned char* head;
unsigned char* tail;
unsigned int used;
unsigned char* buffer;
size_t buffer_size;
private:
pthread_mutex_t mutex;
};
class DumbCircularBuffer* JprofBuffer;
JPROF_STATIC void JprofBufferInit(size_t size) {
JprofBuffer = new DumbCircularBuffer(size);
}
JPROF_STATIC void JprofBufferClear() {
fprintf(stderr, "Told to clear JPROF circular buffer\n");
JprofBuffer->clear();
}
JPROF_STATIC size_t JprofEntrySizeof(malloc_log_entry* me) {
return offsetof(malloc_log_entry, pcs) + me->numpcs * sizeof(char*);
}
JPROF_STATIC void JprofBufferAppend(malloc_log_entry* me) {
size_t size = JprofEntrySizeof(me);
do {
while (JprofBuffer->space_available() < size && JprofBuffer->used > 0) {
#if DEBUG_CIRCULAR
fprintf(
stderr,
"dropping entry: %d in use, %d free, need %d, size_to_free = %d\n",
JprofBuffer->used, JprofBuffer->space_available(), size,
JprofEntrySizeof((malloc_log_entry*)JprofBuffer->head));
#endif
JprofBuffer->drop(JprofEntrySizeof((malloc_log_entry*)JprofBuffer->head));
}
if (JprofBuffer->space_available() < size) return;
} while (!JprofBuffer->insert(me, size));
}
JPROF_STATIC void JprofBufferDump() {
JprofBuffer->lock();
#if DEBUG_CIRCULAR
fprintf(
stderr, "dumping JP_CIRCULAR buffer, %d of %d bytes\n",
JprofBuffer->tail > JprofBuffer->head
? JprofBuffer->tail - JprofBuffer->head
: JprofBuffer->buffer_size + JprofBuffer->tail - JprofBuffer->head,
JprofBuffer->buffer_size);
#endif
if (JprofBuffer->tail >= JprofBuffer->head) {
write(gLogFD, JprofBuffer->head, JprofBuffer->tail - JprofBuffer->head);
} else {
write(gLogFD, JprofBuffer->head,
&(JprofBuffer->buffer[JprofBuffer->buffer_size]) - JprofBuffer->head);
write(gLogFD, JprofBuffer->buffer, JprofBuffer->tail - JprofBuffer->buffer);
}
JprofBuffer->clear();
JprofBuffer->unlock();
}
//----------------------------------------------------------------------
JPROF_STATIC void JprofLog(u_long aTime, void* stack_top, void* top_instr_ptr) {
// Static is simply to make debugging tolerable
static malloc_log_entry me;
me.delTime = aTime;
me.thread = syscall(SYS_gettid); // gettid();
if (was_paused) {
me.flags = JP_FIRST_AFTER_PAUSE;
was_paused = 0;
} else {
me.flags = 0;
}
CrawlStack(&me, stack_top, top_instr_ptr);
#ifndef NTO
if (circular) {
JprofBufferAppend(&me);
} else {
write(gLogFD, &me, JprofEntrySizeof(&me));
}
#else
printf("Neutrino is missing the pcs member of malloc_log_entry!! \n");
#endif
}
static int realTime;
/* Lets interrupt at 10 Hz. This is so my log files don't get too large.
* This can be changed to a faster value latter. This timer is not
* programmed to reset, even though it is capable of doing so. This is
* to keep from getting interrupts from inside of the handler.
*/
static void startSignalCounter(unsigned long millisec) {
struct itimerval tvalue;
tvalue.it_interval.tv_sec = 0;
tvalue.it_interval.tv_usec = 0;
tvalue.it_value.tv_sec = millisec / 1000;
tvalue.it_value.tv_usec = (millisec % 1000) * 1000;
if (realTime) {
setitimer(ITIMER_REAL, &tvalue, nullptr);
} else {
setitimer(ITIMER_PROF, &tvalue, nullptr);
}
}
static long timerMilliSec = 50;
#if defined(linux)
static int setupRTCSignals(int hz, struct sigaction* sap) {
/* global */ rtcFD = open("/dev/rtc", O_RDONLY);
if (rtcFD < 0) {
perror("JPROF_RTC setup: open(\"/dev/rtc\", O_RDONLY)");
return 0;
}
if (sigaction(SIGIO, sap, nullptr) == -1) {
perror("JPROF_RTC setup: sigaction(SIGIO)");
return 0;
}
if (ioctl(rtcFD, RTC_IRQP_SET, hz) == -1) {
perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_IRQP_SET, $JPROF_RTC_HZ)");
return 0;
}
if (ioctl(rtcFD, RTC_PIE_ON, 0) == -1) {
perror("JPROF_RTC setup: ioctl(/dev/rtc, RTC_PIE_ON)");
return 0;
}
if (fcntl(rtcFD, F_SETSIG, 0) == -1) {
perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETSIG, 0)");
return 0;
}
if (fcntl(rtcFD, F_SETOWN, getpid()) == -1) {
perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETOWN, getpid())");
return 0;
}
return 1;
}
static int enableRTCSignals(bool enable) {
static bool enabled = false;
if (enabled == enable) {
return 0;
}
enabled = enable;
int flags = fcntl(rtcFD, F_GETFL);
if (flags < 0) {
perror("JPROF_RTC setup: fcntl(/dev/rtc, F_GETFL)");
return 0;
}
if (enable) {
flags |= FASYNC;
} else {
flags &= ~FASYNC;
}
if (fcntl(rtcFD, F_SETFL, flags) == -1) {
if (enable) {
perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags | FASYNC)");
} else {
perror("JPROF_RTC setup: fcntl(/dev/rtc, F_SETFL, flags & ~FASYNC)");
}
return 0;
}
return 1;
}
#endif
JPROF_STATIC void StackHook(int signum, siginfo_t* info, void* ucontext) {
static struct timeval tFirst;
static int first = 1;
size_t millisec = 0;
#if defined(linux)
if (rtcHz && pthread_self() != main_thread) {
// Only collect stack data on the main thread, for now.
return;
}
#endif
if (first && !(first = 0)) {
puts("Jprof: received first signal");
#if defined(linux)
if (rtcHz) {
enableRTCSignals(true);
} else
#endif
{
gettimeofday(&tFirst, 0);
millisec = 0;
}
} else {
#if defined(linux)
if (rtcHz) {
enableRTCSignals(true);
} else
#endif
{
struct timeval tNow;
gettimeofday(&tNow, 0);
double usec = 1e6 * (tNow.tv_sec - tFirst.tv_sec);
usec += (tNow.tv_usec - tFirst.tv_usec);
millisec = static_cast<size_t>(usec * 1e-3);
}
}
gregset_t& gregs = ((ucontext_t*)ucontext)->uc_mcontext.gregs;
#ifdef __x86_64__
JprofLog(millisec, (void*)gregs[REG_RSP], (void*)gregs[REG_RIP]);
#else
JprofLog(millisec, (void*)gregs[REG_ESP], (void*)gregs[REG_EIP]);
#endif
if (!rtcHz) startSignalCounter(timerMilliSec);
}
NS_EXPORT_(void) setupProfilingStuff(void) {
static int gFirstTime = 1;
char filename[2048]; // XXX fix
if (gFirstTime && !(gFirstTime = 0)) {
int startTimer = 1;
int doNotStart = 1;
int firstDelay = 0;
int append = O_TRUNC;
char* tst = getenv("JPROF_FLAGS");
/* Options from JPROF_FLAGS environment variable:
* JP_DEFER -> Wait for a SIGPROF (or SIGALRM, if JP_REALTIME
* is set) from userland before starting
* to generate them internally
* JP_START -> Install the signal handler
* JP_PERIOD -> Time between profiler ticks
* JP_FIRST -> Extra delay before starting
* JP_REALTIME -> Take stack traces in intervals of real time
* rather than time used by the process (and the
* system for the process). This is useful for
* finding time spent by the X server.
* JP_APPEND -> Append to jprof-log rather than overwriting it.
* This is somewhat risky since it depends on the
* address map staying constant across multiple runs.
* JP_FILENAME -> base filename to use when saving logs. Note that
* this does not affect the mapfile.
* JP_CIRCULAR -> use a circular buffer of size N, write/clear on SIGUSR1
*
* JPROF_SLAVE is set if this is not the first process.
*/
circular = false;
if (tst) {
if (strstr(tst, "JP_DEFER")) {
doNotStart = 0;
startTimer = 0;
}
if (strstr(tst, "JP_START")) doNotStart = 0;
if (strstr(tst, "JP_REALTIME")) realTime = 1;
if (strstr(tst, "JP_APPEND")) append = O_APPEND;
char* delay = strstr(tst, "JP_PERIOD=");
if (delay) {
double tmp = strtod(delay + strlen("JP_PERIOD="), nullptr);
if (tmp >= 1e-3) {
timerMilliSec = static_cast<unsigned long>(1000 * tmp);
} else {
fprintf(stderr, "JP_PERIOD of %g less than 0.001 (1ms), using 1ms\n",
tmp);
timerMilliSec = 1;
}
}
char* circular_op = strstr(tst, "JP_CIRCULAR=");
if (circular_op) {
size_t size = atol(circular_op + strlen("JP_CIRCULAR="));
if (size < 1000) {
fprintf(stderr, "JP_CIRCULAR of %lu less than 1000, using 10000\n",
(unsigned long)size);
size = 10000;
}
JprofBufferInit(size);
fprintf(stderr, "JP_CIRCULAR buffer of %lu bytes\n",
(unsigned long)size);
circular = true;
}
char* first = strstr(tst, "JP_FIRST=");
if (first) {
firstDelay = atol(first + strlen("JP_FIRST="));
}
char* rtc = strstr(tst, "JP_RTC_HZ=");
if (rtc) {
#if defined(linux)
rtcHz = atol(rtc + strlen("JP_RTC_HZ="));
timerMilliSec = 0; /* This makes JP_FIRST work right. */
realTime = 1; /* It's the _R_TC and all. ;) */
# define IS_POWER_OF_TWO(x) (((x) & ((x)-1)) == 0)
if (!IS_POWER_OF_TWO(rtcHz) || rtcHz < 2) {
fprintf(stderr,
"JP_RTC_HZ must be power of two and >= 2, "
"but %d was provided; using default of 2048\n",
rtcHz);
rtcHz = 2048;
}
#else
fputs(
"JP_RTC_HZ found, but RTC profiling only supported on "
"Linux!\n",
stderr);
#endif
}
const char* f = strstr(tst, "JP_FILENAME=");
if (f)
f = f + strlen("JP_FILENAME=");
else
f = M_LOGFILE;
char* is_slave = getenv("JPROF_SLAVE");
if (!is_slave) setenv("JPROF_SLAVE", "", 0);
gIsSlave = !!is_slave;
gFilenamePID = syscall(SYS_gettid); // gettid();
if (is_slave)
snprintf(filename, sizeof(filename), "%s-%d", f, gFilenamePID);
else
snprintf(filename, sizeof(filename), "%s", f);
// XXX FIX! inherit current capture state!
}
if (!doNotStart) {
if (gLogFD < 0) {
gLogFD = open(filename, O_CREAT | O_WRONLY | append, 0666);
if (gLogFD < 0) {
fprintf(stderr, "Unable to create " M_LOGFILE);
perror(":");
} else {
struct sigaction action;
sigset_t mset;
// Dump out the address map when we terminate
RegisterJprofShutdown();
main_thread = pthread_self();
// fprintf(stderr,"jprof: main_thread = %u\n",
// (unsigned int)main_thread);
// FIX! probably should block these against each other
// Very unlikely.
sigemptyset(&mset);
action.sa_handler = nullptr;
action.sa_sigaction = StackHook;
action.sa_mask = mset;
action.sa_flags = SA_RESTART | SA_SIGINFO;
#if defined(linux)
if (rtcHz) {
if (!setupRTCSignals(rtcHz, &action)) {
fputs(
"jprof: Error initializing RTC, NOT "
"profiling\n",
stderr);
return;
}
}
if (!rtcHz || firstDelay != 0)
#endif
{
if (realTime) {
sigaction(SIGALRM, &action, nullptr);
}
}
// enable PROF in all cases to simplify JP_DEFER/pause/restart
sigaction(SIGPROF, &action, nullptr);
// make it so a SIGUSR1 will stop the profiling
// Note: It currently does not close the logfile.
// This could be configurable (so that it could
// later be reopened).
struct sigaction stop_action;
stop_action.sa_handler = EndProfilingHook;
stop_action.sa_mask = mset;
stop_action.sa_flags = SA_RESTART;
sigaction(SIGUSR1, &stop_action, nullptr);
// make it so a SIGUSR2 will clear the circular buffer
stop_action.sa_handler = ClearProfilingHook;
stop_action.sa_mask = mset;
stop_action.sa_flags = SA_RESTART;
sigaction(SIGUSR2, &stop_action, nullptr);
printf(
"Jprof: Initialized signal handler and set "
"timer for %lu %s, %d s "
"initial delay\n",
rtcHz ? rtcHz : timerMilliSec, rtcHz ? "Hz" : "ms", firstDelay);
if (startTimer) {
#if defined(linux)
/* If we have an initial delay we can just use
startSignalCounter to set up a timer to fire the
first stackHook after that delay. When that happens
we'll go and switch to RTC profiling. */
if (rtcHz && firstDelay == 0) {
puts("Jprof: enabled RTC signals");
enableRTCSignals(true);
} else
#endif
{
puts("Jprof: started timer");
startSignalCounter(firstDelay * 1000 + timerMilliSec);
}
}
}
}
}
} else {
printf("setupProfilingStuff() called multiple times\n");
}
}