/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is SniffURI. * * The Initial Developer of the Original Code is * Erik van der Poel . * Portions created by the Initial Developer are Copyright (C) 1998-2005 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * ***** END LICENSE BLOCK ***** */ #include "all.h" #define OUTPUT_DIRECTORY "test/robot/" typedef struct Entry { int count; unsigned char *viewURL; unsigned char *url; } Entry; typedef struct Robot { int slot; int count; View *view; URL *url; char viewFile[1024]; int viewFileAdded; char viewURL[1024]; } Robot; typedef struct StatusEntry { time_t time; char *message; char *file; int line; } StatusEntry; typedef struct TimeEntry { char *task; int count; double total; double min; double max; } TimeEntry; typedef void (*Handler)(int fd); typedef struct FD { Handler handler; FILE *file; } FD; #define numberOfSlots 64 static Thread slots[numberOfSlots]; static Thread statusThread; static StatusEntry statusEntries[numberOfSlots]; static int sortedStatusEntries[numberOfSlots]; static unsigned short mainPort = 40404; static int maxFD = -1; static FD **table = NULL; static fd_set fdSet; static TimeEntry times[] = { { "connect success", 0, 0.0, DBL_MAX, DBL_MIN }, { "connect failure", 0, 0.0, DBL_MAX, DBL_MIN }, { "gethostbyname_r success", 0, 0.0, DBL_MAX, DBL_MIN }, { "gethostbyname_r failure", 0, 0.0, DBL_MAX, DBL_MIN }, { "readStream", 0, 0.0, DBL_MAX, DBL_MIN }, { "total", 0, 0.0, DBL_MAX, DBL_MIN } }; #if 0 static unsigned char *limitURLs[] = { /* "http://somehost/", "http://somehost.somedomain.com/", */ NULL }; #endif static char *outFile = OUTPUT_DIRECTORY "index.html"; static char *limitDomains[16]; static int limitDomainsIndex = 0; static int count = 0; #define LIMIT 50 #define INTERVAL ((LIMIT / 100) ? (LIMIT / 100) : 5) static URL *currURL = NULL; static URL *lastURL = NULL; static URL *urls = NULL; static char *waiting = "waiting"; #ifdef ROBOT_LOG_ATTRIBUTES static HashTable *attributeTable = NULL; #endif static HashTable *contentTypeTable = NULL; static HashTable *httpCharsetTable = NULL; static HashTable *httpHeaderTable = NULL; static HashTable *metaCharsetTable = NULL; static HashTable *schemeTable = NULL; #ifdef ROBOT_LOG_TAGS static HashTable *tagTable = NULL; #endif static HashTable *urlTable = NULL; static HashTable *knownBadTags = NULL; static unsigned char *badTags[] = { NULL }; static FILE *statsOut = NULL; static char *firstURL = NULL; static unsigned char *startTime = NULL; static void robotStatus(App *app, char *message, char *file, int line) { Robot *robot; StatusEntry *entry; if (!app) { return; } robot = app->data; entry = &statusEntries[robot->slot]; time(&entry->time); entry->message = message; entry->file = file; entry->line = line; } static void robotTime(App *app, int task, struct timeval *before) { struct timeval after; double span; gettimeofday(&after, NULL); threadMutexLock(); span = (((after.tv_sec - before->tv_sec) * 1000000.0) + after.tv_usec - before->tv_usec); times[task].total += span; if (span < times[task].min) { times[task].min = span; } if (span > times[task].max) { times[task].max = span; } times[task].count++; threadMutexUnlock(); } static void addEntry(HashTable *table, Robot *robot, unsigned char *url, unsigned char *str) { Entry *entry; HashEntry *hashEntry; hashEntry = hashLookup(table, str); if (hashEntry) { ((Entry *) hashEntry->value)->count++; } else { entry = calloc(sizeof(Entry), 1); if (!entry) { fprintf(stderr, "cannot calloc Entry\n"); exit(0); } entry->count = 1; entry->viewURL = copyString((unsigned char *) robot->viewURL); robot->viewFileAdded = 1; entry->url = copyString(url); hashAdd(table, copyString(str), entry); } } static void freeEntry(unsigned char *str, void *e) { free(str); if (e) { free(((Entry *) e)->url); free(((Entry *) e)->viewURL); free(e); } } static void robotScheme(Robot *robot, unsigned char *url, unsigned char *scheme) { addEntry(schemeTable, robot, url, scheme); } static void addURLFunc(App *app, URL *url) { Robot *robot; if (!url->scheme) { return; } robot = app->data; robotScheme(robot, url->url, url->scheme); if (strcmp((char *) url->scheme, "http")) { urlFree(url); } else { threadMutexLock(); lastURL->next = url; lastURL = url; if (!currURL) { currURL = url; } threadCondSignal(); threadMutexUnlock(); } } static void robotHTTP(App *app, Buf *buf) { viewHTTP(app, buf); } static void robotHTTPBody(App *app, Buf *buf) { } static void robotHTTPHeaderName(App *app, Buf *buf) { Robot *robot; unsigned char *name; robot = app->data; name = bufCopyLower(buf); addEntry(httpHeaderTable, robot, robot->url->url, name); free(name); viewHTTPHeaderName(app, buf); } static void robotHTTPHeaderValue(App *app, Buf *buf, unsigned char *url) { viewHTTPHeaderValue(app, buf); } static void robotHTML(App *app, Buf *buf) { viewHTML(app, buf); } static void robotHTMLText(App *app, Buf *buf) { Robot *robot; unsigned char *p; unsigned char *str; robot = app->data; viewHTMLText(app, buf); str = bufCopy(buf); p = str; while (*p) { if ( (p[0] == '&' ) && (p[1] == '#' ) && (p[2] == '1' ) && (p[3] == '4' ) && (p[4] != '\0') && (p[5] == ';' ) ) { if (p[4] == '7') { fprintf(stderr, "147: %s\n", robot->url->url); } else if (p[4] == '8') { fprintf(stderr, "148: %s\n", robot->url->url); } } p++; } free(str); } static void robotHTMLTag(App *app, HTML *html, Buf *buf) { Robot *robot; HashEntry *tagEntry; robot = app->data; if (html->tagIsKnown) { #ifdef ROBOT_LOG_TAGS addEntry(tagTable, robot, robot->url->url, html->tag); #endif } else { tagEntry = hashLookup(knownBadTags, html->tag); if (!tagEntry) { /* XXX printf("\t\"%s\",\n", html->tag); */ } } viewHTMLTag(app, buf); } static void robotHTMLAttributeName(App *app, HTML *html, Buf *buf) { Robot *robot; robot = app->data; if (html->tagIsKnown) { #ifdef ROBOT_LOG_ATTRIBUTES addEntry(attributeTable, robot, robot->url->url, html->currentAttribute->name); #endif } viewHTMLAttributeName(app, buf); } static void robotHTMLAttributeValue(App *app, HTML *html, Buf *buf) { viewHTMLAttributeValue(app, buf); } static void robotHTMLDeclaration(App *app, Buf *buf) { viewHTMLDeclaration(app, buf); } static void robotHTMLProcessingInstruction(App *app, Buf *buf) { viewHTMLProcessingInstruction(app, buf); } static void robotContentType(App *app, unsigned char *contentType) { Robot *robot; robot = app->data; addEntry(contentTypeTable, robot, robot->url->url, contentType); } static void metaHandler(App *app, HTML *html) { Robot *robot; HTMLAttribute *attr; unsigned char *charset; ContentType *contentType; robot = app->data; attr = html->attributes; while (attr) { if ( (!strcmp((char *) attr->name, "http-equiv")) && (attr->value) && (!strcasecmp((char *) attr->value, "content-type")) ) { break; } attr = attr->next; } if (attr) { contentType = mimeParseContentType(html->currentAttribute->value); charset = mimeGetContentTypeParameter(contentType, "charset"); mimeFreeContentType(contentType); if (charset) { addEntry(metaCharsetTable, robot, robot->url->url, lowerCase(charset)); free(charset); } } } static void tagHandler(App *app, HTML *html) { } static void robotHTTPCharSet(App *app, unsigned char *charset) { Robot *robot; robot = app->data; addEntry(httpCharsetTable, robot, robot->url->url, charset); } static void printEntry(HashEntry *hashEntry) { Entry *entry; unsigned char *key; unsigned char *url; entry = hashEntry->value; key = toHTML(hashEntry->key); url = toHTML(entry->url); fprintf ( statsOut, "%s%d" "View Source" "%s\n", key, entry->count, entry->viewURL, url, url ); free(key); free(url); } static void printTable(HashTable *table, char *column1) { fprintf(statsOut, "\n"); fprintf ( statsOut, "" "" "\n", column1 ); hashEnumerate(table, printEntry); fprintf(statsOut, "
%sCountView SourceExample URL
\n"); } static void printTimes(FILE *file) { int i; fprintf(file, "\n"); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); for (i = 0; i < appTimeMax; i++) { TimeEntry *entry; entry = ×[i]; fprintf(file, ""); fprintf(file, "", entry->task); fprintf(file, "", entry->count); if (entry->count) { fprintf(file, "", (entry->total / entry->count) / 1000000); fprintf(file, "", entry->min / 1000000); fprintf(file, "", entry->max / 1000000); } else { fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); } fprintf(file, ""); } fprintf(file, "
TaskCountAverageMinMax
%s%d%f%f%f   
\n"); } static void printStats(void) { char backup[1024]; char **limit; time_t theTime; sprintf(backup, "%s.bak", outFile); rename(outFile, backup); statsOut = fopen(outFile, "w"); fprintf(statsOut, "Stats\n"); fprintf(statsOut, "\n"); fprintf(statsOut, "\n", startTime); time(&theTime); fprintf(statsOut, "\n", ctime(&theTime)); fprintf(statsOut, "\n", firstURL); fprintf(statsOut, "\n"); fprintf(statsOut, "\n", count); fprintf(statsOut, "\n", netGetDNSCount()); fprintf(statsOut, "\n", netGetConnectCount()); fprintf(statsOut, "" "\n", httpGetNonEmptyHTTPResponseCount()); fprintf(statsOut, "" "\n", httpGetHTTP10OrGreaterCount()); fprintf(statsOut, "
Start Time%s
Time of This File%s
Root URL%s
Domain Limits"); limit = limitDomains; while (*limit) { fprintf(statsOut, "%s ", *limit); limit++; } fprintf(statsOut, "
URLs Attempted%d
DNS Successes%d
Connect Successes%d
Non-empty HTTP Responses%d
HTTP/1.0 or Greater%d
\n"); printTimes(statsOut); printTable(schemeTable, "URL Scheme"); printTable(httpHeaderTable, "HTTP Header"); printTable(contentTypeTable, "Content-Type"); printTable(httpCharsetTable, "HTTP charset"); printTable(metaCharsetTable, "META charset"); #ifdef ROBOT_LOG_TAGS printTable(tagTable, "HTML Tag"); #endif #ifdef ROBOT_LOG_ATTRIBUTES printTable(attributeTable, "HTML Attribute"); #endif fprintf(statsOut, "\n"); fclose(statsOut); } static void openViewFile(App *app) { Robot *robot; robot = app->data; sprintf(robot->viewURL, "%010d.html", robot->count); sprintf(robot->viewFile, "%s%s", OUTPUT_DIRECTORY, robot->viewURL); /* sprintf(robot->viewFile, "/dev/null"); */ robot->viewFileAdded = 0; app->view.out = fopen(robot->viewFile, "w"); if (!app->view.out) { fprintf(stderr, "cannot open %s for writing: %s\n", robot->viewFile, strerror(errno)); exit(0); } fprintf(app->view.out, "View
");
}

static void
closeViewFile(App *app)
{
	Robot	*robot;

	robot = app->data;
	fprintf(app->view.out, "
"); fclose(app->view.out); if (!robot->viewFileAdded) { unlink(robot->viewFile); } robot->viewFileAdded = 0; } static void processURL(App *app) { Robot *robot; struct timeval theTime; robot = app->data; gettimeofday(&theTime, NULL); app->status(app, "processURL", __FILE__, __LINE__); openViewFile(app); httpFree(httpProcess(app, robot->url, NULL, NULL)); closeViewFile(app); app->status(app, "processURL done", __FILE__, __LINE__); app->time(app, appTimeTotal, &theTime); } static void * startHere(void *a) { Robot robot; App *app; robot.slot = (int) a; app = appAlloc(); app->status = robotStatus; app->time = robotTime; app->httpResponse = robotHTTP; app->httpResponseBody = robotHTTPBody; app->httpResponseHeaderName = robotHTTPHeaderName; app->httpResponseHeaderValue = robotHTTPHeaderValue; app->html = robotHTML; app->htmlAttributeName = robotHTMLAttributeName; app->htmlAttributeValue = robotHTMLAttributeValue; app->htmlDeclaration = robotHTMLDeclaration; app->htmlProcessingInstruction = robotHTMLProcessingInstruction; app->htmlTag = robotHTMLTag; app->htmlText = robotHTMLText; app->contentType = robotContentType; app->httpResponseCharSet = robotHTTPCharSet; app->data = &robot; while (1) { threadMutexLock(); while ((!currURL) && (count < LIMIT)) { app->status(app, waiting, __FILE__, __LINE__); threadCondWait(); } if (count >= LIMIT) { threadMutexUnlock(); break; } count++; if (!(count % INTERVAL)) { printStats(); } robot.count = count; robot.url = currURL; currURL = currURL->next; threadMutexUnlock(); processURL(app); } return NULL; } static int allThreadsAreWaiting(void) { int i; for (i = 0; i < numberOfSlots; i++) { if (statusEntries[i].message != waiting) { break; } } return i == numberOfSlots; } static void initKnownBadTags(void) { unsigned char **p; knownBadTags = hashAlloc(NULL); p = badTags; while (*p) { hashAdd(knownBadTags, *p, NULL); p++; } } static FD * addFD(int fd, Handler func) { FD *f; if (fd > maxFD) { if (table) { table = utilRealloc(table, (maxFD + 1) * sizeof(*table), (fd + 1) * sizeof(*table)); } else { table = calloc(fd + 1, sizeof(*table)); } if (!table) { return NULL; } maxFD = fd; } f = malloc(sizeof(FD)); if (!f) { return NULL; } f->handler = func; f->file = NULL; /* f->id = -1; f->port = 0; f->suspend = 0; f->writeFD = -1; */ table[fd] = f; FD_SET(fd, &fdSet); return f; } static void removeFD(int fd) { FD *f; f = table[fd]; if (f) { FD_CLR(fd, &fdSet); if (f->file && (fileno(f->file) == fd)) { fclose(f->file); } else { close(fd); } free(f); table[fd] = NULL; } } static int compareStatusEntries(const void *e1, const void *e2) { StatusEntry *entry1; StatusEntry *entry2; entry1 = &statusEntries[*((int *) e1)]; entry2 = &statusEntries[*((int *) e2)]; return entry1->time - entry2->time; } static void readClientRequest(int fd) { unsigned char b[10240]; int bytesRead; FILE *file; int i; bytesRead = recv(fd, b, sizeof(b) - 1, 0); if (bytesRead < 0) { if (errno != ECONNRESET) { perror("recv"); } removeFD(fd); return; } else if (!bytesRead) { removeFD(fd); return; } b[bytesRead] = 0; file = fdopen(fd, "w"); if (!file) { char *err = "fdopen failed\n"; send(fd, err, strlen(err), 0); removeFD(fd); return; } table[fd]->file = file; if (strstr((char *) b, "/exit")) { char *goodbye = "HTTP/1.0 200 OK\n" "Content-Type: text/html\n" "\n" "Bye!" ; fprintf(file, goodbye); removeFD(fd); exit(0); } else if (strstr((char *) b, "/times")) { char *begin = "HTTP/1.0 200 OK\n" "Content-Type: text/html\n" "\n" ; fprintf(file, begin); printTimes(file); removeFD(fd); } else { char *hello = "HTTP/1.0 200 OK\n" "Content-Type: text/html\n" "\n" ; fprintf(file, hello); fprintf(file, "\n"); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); fprintf(file, ""); for (i = 0; i < numberOfSlots; i++) { sortedStatusEntries[i] = i; } qsort(sortedStatusEntries, numberOfSlots, sizeof(int), compareStatusEntries); for (i = 0; i < numberOfSlots; i++) { StatusEntry *entry; entry = &statusEntries[sortedStatusEntries[i]]; fprintf(file, ""); fprintf(file, "", ctime(&entry->time)); fprintf(file, "", entry->message ? entry->message : "NULL"); fprintf(file, "", entry->file ? entry->file : "NULL"); fprintf(file, "", entry->line); fprintf(file, ""); } fprintf(file, "
TimeMessageFileLine
%s%s%s%d
\n"); removeFD(fd); } } static void acceptNewClient(int fd) { FD *f; int newFD; newFD = netAccept(fd); if (newFD < 0) { fprintf(stderr, "netAccept failed\n"); return; } f = addFD(newFD, readClientRequest); if (!f) { fprintf(stderr, "addFD failed\n"); return; } } static void * startStatusFunc(void *a) { App *app; FD *f; int fd; fd_set localFDSet; int ret; app = appAlloc(); fd = netListen(app, NULL, &mainPort); if (fd < 0) { fprintf(stderr, "netListen failed\n"); exit(0); } f = addFD(fd, acceptNewClient); if (!f) { fprintf(stderr, "addFD failed\n"); exit(0); } while (1) { localFDSet = fdSet; ret = select(maxFD + 1, &localFDSet, NULL, NULL, NULL); if (ret == -1) { perror("select"); } for (fd = 0; fd <= maxFD; fd++) { if (FD_ISSET(fd, &localFDSet)) { (*table[fd]->handler)(fd); } } } return NULL; } static void startStatusThread(void) { int ret; ret = threadCreate(&statusThread, startStatusFunc, NULL); if (ret) { exit(0); } } int main(int argc, char *argv[]) { int i; time_t theTime; URL *url; int ret; #if !defined(WINDOWS) if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) { fprintf(stderr, "signal failed\n"); exit(0); } #endif for (i = 1; i < argc; i++) { if (!strcmp(argv[i], "-d")) { i++; limitDomains[limitDomainsIndex] = argv[i]; limitDomainsIndex++; } else if (!strcmp(argv[i], "-o")) { i++; outFile = argv[i]; } else if (!strcmp(argv[i], "-s")) { i++; firstURL = argv[i]; } } if (!netInit()) { return 1; } if (!threadInit()) { return 1; } time(&theTime); startTime = copyString((unsigned char *) ctime(&theTime)); #ifdef ROBOT_LOG_ATTRIBUTES attributeTable = hashAlloc(freeEntry); #endif contentTypeTable = hashAlloc(freeEntry); httpCharsetTable = hashAlloc(freeEntry); httpHeaderTable = hashAlloc(freeEntry); metaCharsetTable = hashAlloc(freeEntry); schemeTable = hashAlloc(freeEntry); #ifdef ROBOT_LOG_TAGS tagTable = hashAlloc(freeEntry); #endif urlTable = hashAlloc(NULL); initKnownBadTags(); addURLInit(addURLFunc, NULL, limitDomains); htmlRegister("meta", "content", metaHandler); htmlRegisterTagHandler(tagHandler); startStatusThread(); for (i = 0; i < numberOfSlots; i++) { ret = threadCreate(&slots[i], startHere, (void *) i); if (ret) { exit(0); } } threadYield(); if (!firstURL) { firstURL = "http://mozilla.org/"; } url = urlParse((unsigned char *) firstURL); hashAdd(urlTable, (unsigned char *) firstURL, 0); urls = url; lastURL = url; currURL = url; threadCondSignal(); threadYield(); while (1) { if (allThreadsAreWaiting()) { for (i = 0; i < numberOfSlots; i++) { threadCancel(slots[i]); } break; } if (count >= LIMIT) { threadCondBroadcast(); for (i = 0; i < numberOfSlots; i++) { threadJoin(slots[i]); } threadCancel(statusThread); break; } } printStats(); #ifdef ROBOT_LOG_ATTRIBUTES hashFree(attributeTable); #endif hashFree(contentTypeTable); hashFree(httpCharsetTable); hashFree(httpHeaderTable); hashFree(metaCharsetTable); hashFree(schemeTable); #ifdef ROBOT_LOG_TAGS hashFree(tagTable); #endif hashFree(urlTable); url = urls; while (url) { URL *tmp; tmp = url; url = url->next; urlFree(tmp); } exit(0); return 1; }