/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "leaky.h" #include "intcnt.h" #include #include #include #include #include #include #ifndef NTO #include #endif #include #include #include #ifdef NTO #include #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif static const u_int DefaultBuckets = 10007; // arbitrary, but prime static const u_int MaxBuckets = 1000003; // arbitrary, but prime //---------------------------------------------------------------------- int main(int argc, char** argv) { leaky* l = new leaky; l->initialize(argc, argv); l->outputfd = stdout; for (int i = 0; i < l->numLogFiles; i++) { if (l->output_dir || l->numLogFiles > 1) { char name[2048]; // XXX fix if (l->output_dir) snprintf(name,sizeof(name),"%s/%s.html",l->output_dir,argv[l->logFileIndex + i]); else snprintf(name,sizeof(name),"%s.html",argv[l->logFileIndex + i]); fprintf(stderr,"opening %s\n",name); l->outputfd = fopen(name,"w"); // if an error we won't process the file } if (l->outputfd) { // paranoia l->open(argv[l->logFileIndex + i]); if (l->outputfd != stderr) { fclose(l->outputfd); l->outputfd = NULL; } } } return 0; } char * htmlify(const char *in) { const char *p = in; char *out, *q; int n = 0; size_t newlen; // Count the number of '<' and '>' in the input. while ((p = strpbrk(p, "<>"))) { ++n; ++p; } // Knowing the number of '<' and '>', we can calculate the space // needed for the output string. newlen = strlen(in) + n * 3 + 1; out = new char[newlen]; // Copy the input to the output, with substitutions. p = in; q = out; do { if (*p == '<') { strcpy(q, "<"); q += 4; } else if (*p == '>') { strcpy(q, ">"); q += 4; } else { *q++ = *p; } p++; } while (*p); *q = '\0'; return out; } leaky::leaky() { applicationName = NULL; progFile = NULL; quiet = true; showAddress = false; showThreads = false; stackDepth = 100000; onlyThread = 0; cleo = false; mappedLogFile = -1; firstLogEntry = lastLogEntry = 0; sfd = -1; externalSymbols = 0; usefulSymbols = 0; numExternalSymbols = 0; lowestSymbolAddr = 0; highestSymbolAddr = 0; loadMap = NULL; collect_last = false; collect_start = -1; collect_end = -1; } leaky::~leaky() { } void leaky::usageError() { fprintf(stderr, "Usage: %s [-v] [-t] [-e exclude] [-i include] [-s stackdepth] [--last] [--all] [--start n [--end m]] [--cleo] [--output-dir dir] prog log [log2 ...]\n", (char*) applicationName); fprintf(stderr, "\t-v: verbose\n" "\t-t | --threads: split threads\n" "\t--only-thread n: only profile thread N\n" "\t-i include-id: stack must include specified id\n" "\t-e exclude-id: stack must NOT include specified id\n" "\t-s stackdepth: Limit depth looked at from captured stack frames\n" "\t--last: only profile the last capture section\n" "\t--start n [--end m]: profile n to m (or end) capture sections\n" "\t--cleo: format output for 'cleopatra' display\n" "\t--output-dir dir: write output files to dir\n" "\tIf there's one log, output goes to stdout unless --output-dir is set\n" "\tIf there are more than one log, output files will be named with .html added\n" ); exit(-1); } static struct option longopts[] = { { "threads", 0, NULL, 't' }, { "only-thread", 1, NULL, 'T' }, { "last", 0, NULL, 'l' }, { "start", 1, NULL, 'x' }, { "end", 1, NULL, 'n' }, { "cleo",0, NULL, 'c' }, { "output-dir", 1, NULL, 'd' }, { NULL, 0, NULL, 0 }, }; void leaky::initialize(int argc, char** argv) { applicationName = argv[0]; applicationName = strrchr(applicationName, '/'); if (!applicationName) { applicationName = argv[0]; } else { applicationName++; } int arg; int errflg = 0; int longindex = 0; onlyThread = 0; output_dir = NULL; cleo = false; // XXX tons of cruft here left over from tracemalloc // XXX The -- options shouldn't need short versions, or they should be documented while (((arg = getopt_long(argc, argv, "adEe:gh:i:r:Rs:tT:qvx:ln:",longopts,&longindex)) != -1)) { switch (arg) { case '?': default: fprintf(stderr,"error: unknown option %c\n",optopt); errflg++; break; case 'a': break; case 'A': // not implemented showAddress = true; break; case 'c': cleo = true; break; case 'd': output_dir = optarg; // reference to an argv pointer break; case 'R': break; case 'e': exclusions.add(optarg); break; case 'g': break; case 'r': // not implemented roots.add(optarg); if (!includes.IsEmpty()) { errflg++; } break; case 'i': includes.add(optarg); if (!roots.IsEmpty()) { errflg++; } break; case 'h': break; case 's': stackDepth = atoi(optarg); if (stackDepth < 2) { stackDepth = 2; } break; case 'x': // --start collect_start = atoi(optarg); break; case 'n': // --end collect_end = atoi(optarg); break; case 'l': // --last collect_last = true; break; case 'q': break; case 'v': quiet = !quiet; break; case 't': showThreads = true; break; case 'T': showThreads = true; onlyThread = atoi(optarg); break; } } if (errflg || ((argc - optind) < 2)) { usageError(); } progFile = argv[optind++]; logFileIndex = optind; numLogFiles = argc - optind; if (!quiet) fprintf(stderr,"numlogfiles = %d\n",numLogFiles); } static void* mapFile(int fd, u_int flags, off_t* sz) { struct stat sb; if (fstat(fd, &sb) < 0) { perror("fstat"); exit(-1); } void* base = mmap(0, (int)sb.st_size, flags, MAP_PRIVATE, fd, 0); if (!base) { perror("mmap"); exit(-1); } *sz = sb.st_size; return base; } void leaky::LoadMap() { malloc_map_entry mme; char name[1000]; if (!loadMap) { // all files use the same map int fd = ::open(M_MAPFILE, O_RDONLY); if (fd < 0) { perror("open: " M_MAPFILE); exit(-1); } for (;;) { int nb = read(fd, &mme, sizeof(mme)); if (nb != sizeof(mme)) break; nb = read(fd, name, mme.nameLen); if (nb != (int)mme.nameLen) break; name[mme.nameLen] = 0; if (!quiet) { fprintf(stderr,"%s @ %lx\n", name, mme.address); } LoadMapEntry* lme = new LoadMapEntry; lme->address = mme.address; lme->name = strdup(name); lme->next = loadMap; loadMap = lme; } close(fd); } } void leaky::open(char *logFile) { int threadArray[100]; // should auto-expand int last_thread = -1; int numThreads = 0; int section = -1; bool collecting = false; LoadMap(); setupSymbols(progFile); // open up the log file if (mappedLogFile) ::close(mappedLogFile); mappedLogFile = ::open(logFile, O_RDONLY); if (mappedLogFile < 0) { perror("open"); exit(-1); } off_t size; firstLogEntry = (malloc_log_entry*) mapFile(mappedLogFile, PROT_READ, &size); lastLogEntry = (malloc_log_entry*)((char*)firstLogEntry + size); if (!collect_last || collect_start < 0) { collecting = true; } // First, restrict it to the capture sections specified (all, last, start/end) // This loop walks through all the call stacks we recorded for (malloc_log_entry* lep=firstLogEntry; lep < lastLogEntry; lep = reinterpret_cast(&lep->pcs[lep->numpcs])) { if (lep->flags & JP_FIRST_AFTER_PAUSE) { section++; if (collect_last) { firstLogEntry = lep; numThreads = 0; collecting = true; } if (collect_start == section) { collecting = true; firstLogEntry = lep; } if (collect_end == section) { collecting = false; lastLogEntry = lep; } if (!quiet) fprintf(stderr,"New section %d: first=%p, last=%p, collecting=%d\n", section,(void*)firstLogEntry,(void*)lastLogEntry,collecting); } // Capture thread info at the same time // Find all the threads captured // pthread/linux docs say the signal can be delivered to any thread in // the process. In practice, it appears in Linux that it's always // delivered to the thread that called setitimer(), and each thread can // have a separate itimer. There's a support library for gprof that // overlays pthread_create() to set timers in any threads you spawn. if (showThreads && collecting) { if (lep->thread != last_thread) { int i; for (i=0; ithread == threadArray[i]) break; } if (i == numThreads && i < (int) (sizeof(threadArray)/sizeof(threadArray[0]))) { threadArray[i] = lep->thread; numThreads++; if (!quiet) fprintf(stderr,"new thread %d\n",lep->thread); } } } } if (!quiet) fprintf(stderr,"Done collecting: sections %d: first=%p, last=%p, numThreads=%d\n", section,(void*)firstLogEntry,(void*)lastLogEntry,numThreads); if (!cleo) { fprintf(outputfd,"Jprof Profile Report\n"); fprintf(outputfd,"

Jprof Profile Report

\n"); } if (showThreads) { fprintf(stderr,"Num threads %d\n",numThreads); if (!cleo) { fprintf(outputfd,"
Threads:

\n");
      for (int i=0; i%d  ",
                threadArray[i],threadArray[i]);
        if ((i+1)%10 == 0)
          fprintf(outputfd,"
\n"); } fprintf(outputfd,"
"); } for (int i=0; i\n"); } //---------------------------------------------------------------------- static int symbolOrder(void const* a, void const* b) { Symbol const** ap = (Symbol const **)a; Symbol const** bp = (Symbol const **)b; return (*ap)->address == (*bp)->address ? 0 : ((*ap)->address > (*bp)->address ? 1 : -1); } void leaky::ReadSharedLibrarySymbols() { LoadMapEntry* lme = loadMap; while (NULL != lme) { ReadSymbols(lme->name, lme->address); lme = lme->next; } } void leaky::setupSymbols(const char *fileName) { if (usefulSymbols == 0) { // only read once! // Read in symbols from the program ReadSymbols(fileName, 0); // Read in symbols from the .so's ReadSharedLibrarySymbols(); if (!quiet) { fprintf(stderr,"A total of %d symbols were loaded\n", usefulSymbols); } // Now sort them qsort(externalSymbols, usefulSymbols, sizeof(Symbol *), symbolOrder); lowestSymbolAddr = externalSymbols[0]->address; highestSymbolAddr = externalSymbols[usefulSymbols-1]->address; } } // Binary search the table, looking for a symbol that covers this // address. int leaky::findSymbolIndex(u_long addr) { u_int base = 0; u_int limit = usefulSymbols - 1; Symbol** end = &externalSymbols[limit]; while (base <= limit) { u_int midPoint = (base + limit)>>1; Symbol** sp = &externalSymbols[midPoint]; if (addr < (*sp)->address) { if (midPoint == 0) { return -1; } limit = midPoint - 1; } else { if (sp+1 < end) { if (addr < (*(sp+1))->address) { return midPoint; } } else { return midPoint; } base = midPoint + 1; } } return -1; } Symbol* leaky::findSymbol(u_long addr) { int idx = findSymbolIndex(addr); if(idx<0) { return NULL; } else { return externalSymbols[idx]; } } //---------------------------------------------------------------------- bool leaky::excluded(malloc_log_entry* lep) { if (exclusions.IsEmpty()) { return false; } char** pcp = &lep->pcs[0]; u_int n = lep->numpcs; for (u_int i = 0; i < n; i++, pcp++) { Symbol* sp = findSymbol((u_long) *pcp); if (sp && exclusions.contains(sp->name)) { return true; } } return false; } bool leaky::included(malloc_log_entry* lep) { if (includes.IsEmpty()) { return true; } char** pcp = &lep->pcs[0]; u_int n = lep->numpcs; for (u_int i = 0; i < n; i++, pcp++) { Symbol* sp = findSymbol((u_long) *pcp); if (sp && includes.contains(sp->name)) { return true; } } return false; } //---------------------------------------------------------------------- void leaky::displayStackTrace(FILE* out, malloc_log_entry* lep) { char** pcp = &lep->pcs[0]; u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth; for (u_int i = 0; i < n; i++, pcp++) { u_long addr = (u_long) *pcp; Symbol* sp = findSymbol(addr); if (sp) { fputs(sp->name, out); if (showAddress) { fprintf(out, "[%p]", (char*)addr); } } else { fprintf(out, "<%p>", (char*)addr); } fputc(' ', out); } fputc('\n', out); } void leaky::dumpEntryToLog(malloc_log_entry* lep) { printf("%ld\t", lep->delTime); printf(" --> "); displayStackTrace(outputfd, lep); } void leaky::generateReportHTML(FILE *fp, int *countArray, int count, int thread) { fprintf(fp,"
"); if (showThreads) { fprintf(fp,"
Thread: %d

", thread,thread); } fprintf(fp,"flat | hierarchical", thread,thread); fprintf(fp,"

\n"); int totalTimerHits = count; int *rankingTable = new int[usefulSymbols]; for(int cnt=usefulSymbols; --cnt>=0; rankingTable[cnt]=cnt); // Drat. I would use ::qsort() but I would need a global variable and my // intro-pascal professor threatened to flunk anyone who used globals. // She damaged me for life :-) (That was 1986. See how much influence // she had. I don't remember her name but I always feel guilty about globals) // Shell Sort. 581130733 is the max 31 bit value of h = 3h+1 int mx, i, h; for(mx=usefulSymbols/9, h=581130733; h>0; h/=3) { if(h=h) && (countArray[rankingTable[j-h]]

Hierarchical Profile

\n", thread); fprintf(fp, "
\n");
  fprintf(fp, "%6s %6s         %4s      %s\n",
          "index", "Count", "Hits", "Function Name");

  for(i=0; i0; i++) {
    Symbol **sp=&externalSymbols[rankingTable[i]];
    
    (*sp)->cntP.printReport(fp, this, rankingTable[i], totalTimerHits);

    char *symname = htmlify((*sp)->name);
    fprintf(fp, "%6d %6d (%3.1f%%)%s %8d (%3.1f%%)%s %s\n", 
            rankingTable[i],
            (*sp)->timerHit, ((*sp)->timerHit*1000/totalTimerHits)/10.0,
            ((*sp)->timerHit*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
            rankingTable[i], countArray[rankingTable[i]],
            (countArray[rankingTable[i]]*1000/totalTimerHits)/10.0,
            (countArray[rankingTable[i]]*1000/totalTimerHits)/10.0 >= 10.0 ? "" : " ",
            symname);
    delete [] symname;

    (*sp)->cntC.printReport(fp, this, rankingTable[i], totalTimerHits);

    fprintf(fp, "
\n"); } fprintf(fp,"
\n"); // OK, Now we want to print the flat profile. To do this we resort on // the hit count. // Cut-N-Paste Shell sort from above. The Ranking Table has already been // populated, so we do not have to reinitialize it. for(mx=usefulSymbols/9, h=581130733; h>0; h/=3) { if(htimerHit; for(j = i; (j>=h) && (externalSymbols[rankingTable[j-h]]->timerHittimerHit>0; i++) { Symbol **sp=&externalSymbols[rankingTable[i]]; totalTimerHits += (*sp)->timerHit; } if (totalTimerHits == 0) totalTimerHits = 1; if (totalTimerHits != count) fprintf(stderr,"Hit count mismatch: count=%d; totalTimerHits=%d", count,totalTimerHits); fprintf(fp,"

Flat Profile


\n", thread); fprintf(fp, "
\n");

  fprintf(fp, "Total hit count: %d\n", totalTimerHits);
  fprintf(fp, "Count %%Total  Function Name\n");
  // Now loop for as long as we have timer hits
  for(i=0;
      itimerHit>0; i++) {

    Symbol **sp=&externalSymbols[rankingTable[i]];
    
    char *symname = htmlify((*sp)->name);
    fprintf(fp, "%3d   %-2.1f     %s\n",
            rankingTable[i], (*sp)->timerHit,
            ((float)(*sp)->timerHit/(float)totalTimerHits)*100.0, symname);
    delete [] symname;
  }
}

void leaky::analyze(int thread)
{
  int *countArray = new int[usefulSymbols];
  int *flagArray  = new int[usefulSymbols];

  //Zero our function call counter
  memset(countArray, 0, sizeof(countArray[0])*usefulSymbols);

  // reset hit counts
  for(int i=0; itimerHit = 0;
    externalSymbols[i]->regClear();
  }

  // The flag array is used to prevent counting symbols multiple times
  // if functions are called recursively.  In order to keep from having
  // to zero it on each pass through the loop, we mark it with the value
  // of stacks on each trip through the loop.  This means we can determine
  // if we have seen this symbol for this stack trace w/o having to reset
  // from the prior stacktrace.
  memset(flagArray, -1, sizeof(flagArray[0])*usefulSymbols);

  if (cleo)
    fprintf(outputfd,"m-Start\n");

  // This loop walks through all the call stacks we recorded
  // --last, --start and --end can restrict it, as can excludes/includes
  stacks = 0;
  for(malloc_log_entry* lep=firstLogEntry; 
    lep < lastLogEntry;
    lep = reinterpret_cast(&lep->pcs[lep->numpcs])) {

    if ((thread != 0 && lep->thread != thread) ||
        excluded(lep) || !included(lep))
    {
      continue;
    }

    ++stacks; // How many stack frames did we collect

    u_int n = (lep->numpcs < stackDepth) ? lep->numpcs : stackDepth;
    char** pcp = &lep->pcs[n-1];
    int idx=-1, parrentIdx=-1;  // Init idx incase n==0
    if (cleo) {
      // This loop walks through every symbol in the call stack.  By walking it
      // backwards we know who called the function when we get there.
      char type = 's';
      for (int i=n-1; i>=0; --i, --pcp) {
        idx = findSymbolIndex(reinterpret_cast(*pcp));

        if(idx>=0) {
          // Skip over bogus __restore_rt frames that realtime profiling
          // can introduce.
          if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) {
            --pcp;
            --i;
            idx = findSymbolIndex(reinterpret_cast(*pcp));
            if (idx < 0) {
              continue;
            }
          }
          Symbol **sp=&externalSymbols[idx];
          char *symname = htmlify((*sp)->name);
          fprintf(outputfd,"%c-%s\n",type,symname);
          delete [] symname;
        }
        // else can't find symbol - ignore
        type = 'c';
      }
    } else {
      // This loop walks through every symbol in the call stack.  By walking it
      // backwards we know who called the function when we get there.
      for (int i=n-1; i>=0; --i, --pcp) {
        idx = findSymbolIndex(reinterpret_cast(*pcp));

        if(idx>=0) {
          // Skip over bogus __restore_rt frames that realtime profiling
          // can introduce.
          if (i > 0 && !strcmp(externalSymbols[idx]->name, "__restore_rt")) {
            --pcp;
            --i;
            idx = findSymbolIndex(reinterpret_cast(*pcp));
            if (idx < 0) {
              continue;
            }
          }
	
          // If we have not seen this symbol before count it and mark it as seen
          if(flagArray[idx]!=stacks && ((flagArray[idx]=stacks) || true)) {
            ++countArray[idx];
          }

          // We know who we are and we know who our parrent is.  Count this
          if(parrentIdx>=0) {
            externalSymbols[parrentIdx]->regChild(idx);
            externalSymbols[idx]->regParrent(parrentIdx);
          }
          // inside if() so an unknown in the middle of a stack won't break
          // the link!
          parrentIdx=idx;
        }
      }

      // idx should be the function that we were in when we received the signal.
      if(idx>=0) {
        ++externalSymbols[idx]->timerHit;
      }

    }
  }
  if (!cleo)
    generateReportHTML(outputfd, countArray, stacks, thread);
}

void FunctionCount::printReport(FILE *fp, leaky *lk, int parent, int total)
{
    const char *fmt = "                      %8d (%3.1f%%)%s %s%s\n";

    int nmax, tmax=((~0U)>>1);
    
    do {
	nmax=0;
	for(int j=getSize(); --j>=0;) {
	    int cnt = getCount(j);
	    if(cnt==tmax) {
		int idx = getIndex(j);
		char *symname = htmlify(lk->indexToName(idx));
                fprintf(fp, fmt, idx, getCount(j),
                        getCount(j)*100.0/total,
                        getCount(j)*100.0/total >= 10.0 ? "" : " ",
                        symname,
                        parent == idx ? " (self)" : "");
		delete [] symname;
	    } else if(cntnmax) {
	        nmax=cnt;
	    }
	}
    } while((tmax=nmax)>0);
}