/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ThreadResponsiveness.h" #include "nsThreadUtils.h" #include "platform.h" #include "TableTicker.h" #include "mozilla/TimeStamp.h" // Memory profile #include "nsMemoryReporterManager.h" // this port is based off of v8 svn revision 9837 // XXX: this is a very stubbed out implementation // that only supports a single Sampler struct SamplerRegistry { static void AddActiveSampler(Sampler *sampler) { ASSERT(!SamplerRegistry::sampler); SamplerRegistry::sampler = sampler; } static void RemoveActiveSampler(Sampler *sampler) { SamplerRegistry::sampler = NULL; } static Sampler *sampler; }; Sampler *SamplerRegistry::sampler = NULL; // The following variables are used to communicate between the signal // sender thread and the signal handler on the sampled thread. // // sCurrentThreadProfile is used to pass the current thread profile INTO // the signal handler. sSignalHandlingDone is used by the handler to // indicate when it's finished. The signal-sender thread spins on // sSignalHandlingDone (using sched_yield). This is to avoid usage of // synchronization primitives like condvars in the signal handler code. static mozilla::Atomic sCurrentThreadProfile; static mozilla::Atomic sSignalHandlingDone; #ifdef DEBUG // 0 is never a valid thread id on MacOSX since a pthread_t is a pointer. static const pthread_t kNoThread = (pthread_t) 0; #endif static void SetSampleContext(TickSample* sample, void* context) { // Extracting the sample from the context is extremely machine dependent. ucontext_t* ucontext = reinterpret_cast(context); mcontext_t& mcontext = ucontext->uc_mcontext; #if defined(SPS_PLAT_amd64_darwin) sample->pc = reinterpret_cast
(mcontext->__ss.__rip); sample->sp = reinterpret_cast
(mcontext->__ss.__rsp); sample->fp = reinterpret_cast
(mcontext->__ss.__rbp); #elif defined(SPS_PLAT_x86_darwin) sample->pc = reinterpret_cast
(mcontext->__ss.__eip); sample->sp = reinterpret_cast
(mcontext->__ss.__esp); sample->fp = reinterpret_cast
(mcontext->__ss.__ebp); #endif } void OS::Startup() { } void OS::Sleep(int milliseconds) { usleep(1000 * milliseconds); } void OS::SleepMicro(int microseconds) { usleep(microseconds); } Thread::Thread(const char* name) : stack_size_(0) { set_name(name); } Thread::~Thread() { } static void SetThreadName(const char* name) { // pthread_setname_np is only available in 10.6 or later, so test // for it at runtime. int (*dynamic_pthread_setname_np)(const char*); *reinterpret_cast(&dynamic_pthread_setname_np) = dlsym(RTLD_DEFAULT, "pthread_setname_np"); if (!dynamic_pthread_setname_np) return; // Mac OS X does not expose the length limit of the name, so hardcode it. static const int kMaxNameLength = 63; USE(kMaxNameLength); ASSERT(Thread::kMaxThreadNameLength <= kMaxNameLength); dynamic_pthread_setname_np(name); } static void* ThreadEntry(void* arg) { Thread* thread = reinterpret_cast(arg); thread->thread_ = pthread_self(); SetThreadName(thread->name()); ASSERT(thread->thread_ != kNoThread); thread->Run(); return NULL; } void Thread::set_name(const char* name) { strncpy(name_, name, sizeof(name_)); name_[sizeof(name_) - 1] = '\0'; } void Thread::Start() { pthread_attr_t* attr_ptr = NULL; pthread_attr_t attr; if (stack_size_ > 0) { pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, static_cast(stack_size_)); attr_ptr = &attr; } pthread_create(&thread_, attr_ptr, ThreadEntry, this); ASSERT(thread_ != kNoThread); } void Thread::Join() { pthread_join(thread_, NULL); } namespace { void ProfilerSignalHandler(int signal, siginfo_t* info, void* context) { if (!Sampler::GetActiveSampler()) { sSignalHandlingDone = true; return; } TickSample sample_obj; TickSample* sample = &sample_obj; sample->context = context; // If profiling, we extract the current pc and sp. if (Sampler::GetActiveSampler()->IsProfiling()) { SetSampleContext(sample, context); } sample->threadProfile = sCurrentThreadProfile; sample->timestamp = mozilla::TimeStamp::Now(); sample->rssMemory = sample->threadProfile->mRssMemory; sample->ussMemory = sample->threadProfile->mUssMemory; Sampler::GetActiveSampler()->Tick(sample); sCurrentThreadProfile = NULL; sSignalHandlingDone = true; } } // namespace static void ProfilerSignalThread(ThreadProfile *profile, bool isFirstProfiledThread) { if (isFirstProfiledThread && Sampler::GetActiveSampler()->ProfileMemory()) { profile->mRssMemory = nsMemoryReporterManager::ResidentFast(); profile->mUssMemory = nsMemoryReporterManager::ResidentUnique(); } else { profile->mRssMemory = 0; profile->mUssMemory = 0; } } class PlatformData : public Malloced { public: PlatformData() : profiled_thread_(mach_thread_self()) { profiled_pthread_ = pthread_from_mach_thread_np(profiled_thread_); } ~PlatformData() { // Deallocate Mach port for thread. mach_port_deallocate(mach_task_self(), profiled_thread_); } thread_act_t profiled_thread() { return profiled_thread_; } pthread_t profiled_pthread() { return profiled_pthread_; } private: // Note: for profiled_thread_ Mach primitives are used instead of PThread's // because the latter doesn't provide thread manipulation primitives required. // For details, consult "Mac OS X Internals" book, Section 7.3. thread_act_t profiled_thread_; // we also store the pthread because Mach threads have no concept of stack // and we want to be able to get the stack size when we need to unwind the // stack using frame pointers. pthread_t profiled_pthread_; }; /* static */ PlatformData* Sampler::AllocPlatformData(int aThreadId) { return new PlatformData; } /* static */ void Sampler::FreePlatformData(PlatformData* aData) { delete aData; } class SamplerThread : public Thread { public: explicit SamplerThread(double interval) : Thread("SamplerThread") , intervalMicro_(floor(interval * 1000 + 0.5)) { if (intervalMicro_ <= 0) { intervalMicro_ = 1; } } static void AddActiveSampler(Sampler* sampler) { SamplerRegistry::AddActiveSampler(sampler); if (instance_ == NULL) { instance_ = new SamplerThread(sampler->interval()); instance_->Start(); } } static void RemoveActiveSampler(Sampler* sampler) { instance_->Join(); //XXX: unlike v8 we need to remove the active sampler after doing the Join // because we drop the sampler immediately SamplerRegistry::RemoveActiveSampler(sampler); delete instance_; instance_ = NULL; } // Implement Thread::Run(). virtual void Run() { TimeDuration lastSleepOverhead = 0; TimeStamp sampleStart = TimeStamp::Now(); while (SamplerRegistry::sampler->IsActive()) { SamplerRegistry::sampler->DeleteExpiredMarkers(); if (!SamplerRegistry::sampler->IsPaused()) { mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); std::vector threads = SamplerRegistry::sampler->GetRegisteredThreads(); bool isFirstProfiledThread = true; for (uint32_t i = 0; i < threads.size(); i++) { ThreadInfo* info = threads[i]; // This will be null if we're not interested in profiling this thread. if (!info->Profile() || info->IsPendingDelete()) continue; PseudoStack::SleepState sleeping = info->Stack()->observeSleeping(); if (sleeping == PseudoStack::SLEEPING_AGAIN) { info->Profile()->DuplicateLastSample(); continue; } info->Profile()->GetThreadResponsiveness()->Update(); ThreadProfile* thread_profile = info->Profile(); sCurrentThreadProfile = thread_profile; ProfilerSignalThread(sCurrentThreadProfile, isFirstProfiledThread); SampleContext(SamplerRegistry::sampler, thread_profile, isFirstProfiledThread); isFirstProfiledThread = false; } } TimeStamp targetSleepEndTime = sampleStart + TimeDuration::FromMicroseconds(intervalMicro_); TimeStamp beforeSleep = TimeStamp::Now(); TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep; double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds()); OS::SleepMicro(sleepTime); sampleStart = TimeStamp::Now(); lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime)); } } void SampleContext(Sampler* sampler, ThreadProfile* thread_profile, bool isFirstProfiledThread) { pthread_t profiled_pthread = thread_profile->GetPlatformData()->profiled_pthread(); MOZ_ASSERT(sSignalHandlingDone == false); pthread_kill(profiled_pthread, SIGPROF); while (!sSignalHandlingDone) { sched_yield(); } sSignalHandlingDone = false; } int intervalMicro_; //RuntimeProfilerRateLimiter rate_limiter_; static SamplerThread* instance_; DISALLOW_COPY_AND_ASSIGN(SamplerThread); }; #undef REGISTER_FIELD SamplerThread* SamplerThread::instance_ = NULL; Sampler::Sampler(double interval, bool profiling, int entrySize) : // isolate_(isolate), interval_(interval), profiling_(profiling), paused_(false), active_(false), entrySize_(entrySize) /*, samples_taken_(0)*/ { } Sampler::~Sampler() { ASSERT(!IsActive()); } void Sampler::Start() { ASSERT(!IsActive()); // Initialize signal handler communication sCurrentThreadProfile = NULL; sSignalHandlingDone = false; // Request profiling signals. LOG("Request signal"); struct sigaction sa; sa.sa_sigaction = ProfilerSignalHandler; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_RESTART | SA_SIGINFO; if (sigaction(SIGPROF, &sa, &old_sigprof_signal_handler_) != 0) { LOG("Error installing signal"); return; } signal_handler_installed_ = true; // Start a thread that sends SIGPROF signal to VM thread. // Sending the signal ourselves instead of relying on itimer provides // much better accuracy. SetActive(true); SamplerThread::AddActiveSampler(this); LOG("Profiler thread started"); } void Sampler::Stop() { ASSERT(IsActive()); SetActive(false); SamplerThread::RemoveActiveSampler(this); // Restore old signal handler if (signal_handler_installed_) { sigaction(SIGPROF, &old_sigprof_signal_handler_, 0); signal_handler_installed_ = false; } } pthread_t Sampler::GetProfiledThread(PlatformData* aData) { return aData->profiled_pthread(); } #include pid_t gettid() { return (pid_t) syscall(SYS_thread_selfid); } /* static */ Thread::tid_t Thread::GetCurrentId() { return gettid(); } bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread, void* stackTop) { if (!Sampler::sRegisteredThreadsMutex) return false; mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); int id = gettid(); for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { ThreadInfo* info = sRegisteredThreads->at(i); if (info->ThreadId() == id && !info->IsPendingDelete()) { // Thread already registered. This means the first unregister will be // too early. ASSERT(false); return false; } } set_tls_stack_top(stackTop); ThreadInfo* info = new StackOwningThreadInfo(aName, id, aIsMainThread, aPseudoStack, stackTop); if (sActiveSampler) { sActiveSampler->RegisterThread(info); } sRegisteredThreads->push_back(info); return true; } void Sampler::UnregisterCurrentThread() { if (!Sampler::sRegisteredThreadsMutex) return; tlsStackTop.set(nullptr); mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex); int id = gettid(); for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) { ThreadInfo* info = sRegisteredThreads->at(i); if (info->ThreadId() == id && !info->IsPendingDelete()) { if (profiler_is_active()) { // We still want to show the results of this thread if you // save the profile shortly after a thread is terminated. // For now we will defer the delete to profile stop. info->SetPendingDelete(); break; } else { delete info; sRegisteredThreads->erase(sRegisteredThreads->begin() + i); break; } } } } void TickSample::PopulateContext(void* aContext) { // Note that this asm changes if PopulateContext's parameter list is altered #if defined(SPS_PLAT_amd64_darwin) asm ( // Compute caller's %rsp by adding to %rbp: // 8 bytes for previous %rbp, 8 bytes for return address "leaq 0x10(%%rbp), %0\n\t" // Dereference %rbp to get previous %rbp "movq (%%rbp), %1\n\t" : "=r"(sp), "=r"(fp) ); #elif defined(SPS_PLAT_x86_darwin) asm ( // Compute caller's %esp by adding to %ebp: // 4 bytes for aContext + 4 bytes for return address + // 4 bytes for previous %ebp "leal 0xc(%%ebp), %0\n\t" // Dereference %ebp to get previous %ebp "movl (%%ebp), %1\n\t" : "=r"(sp), "=r"(fp) ); #else # error "Unsupported architecture" #endif pc = reinterpret_cast
(__builtin_extract_return_addr( __builtin_return_address(0))); }