/* * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Web Sniffer. * * The Initial Developer of the Original Code is Erik van der Poel. * Portions created by Erik van der Poel are * Copyright (C) 1998,1999,2000 Erik van der Poel. * All Rights Reserved. * * Contributor(s): */ #include #include #include #include "url.h" #include "utils.h" typedef struct StackEntry { unsigned char *str; struct StackEntry *next; struct StackEntry *previous; } StackEntry; typedef struct Stack { StackEntry *bottom; StackEntry *top; } Stack; static URL * urlAlloc(void) { URL *result; result = calloc(sizeof(URL), 1); if (!result) { fprintf(stderr, "cannot calloc URL\n"); exit(0); } result->port = -1; return result; } void urlFree(URL *url) { FREE(url->file); FREE(url->fragment); FREE(url->host); FREE(url->login); FREE(url->net_loc); FREE(url->params); FREE(url->password); FREE(url->path); FREE(url->pathWithoutFile); FREE(url->query); FREE(url->scheme); FREE(url->url); FREE(url); } static void urlEmbellish(URL *url) { unsigned char *at; unsigned char *colon; unsigned char *host; unsigned char *login; unsigned char *p; p = (unsigned char *) strrchr((char *) url->path, '/'); if (p) { FREE(url->pathWithoutFile); url->pathWithoutFile = copySizedString(url->path, p + 1 - url->path); p++; } else { p = url->path; } if (p[0]) { FREE(url->file); url->file = copyString(p); } if (url->net_loc) { at = (unsigned char *) strchr((char *) url->net_loc, '@'); if (at) { login = url->net_loc; colon = (unsigned char *) strchr((char *) login, ':'); if (colon && (colon < at)) { url->password = copySizedString(colon + 1, at - colon - 1); url->login = copySizedString(login, colon - login); } else { url->login = copySizedString(login, at - login); } host = at + 1; } else { host = url->net_loc; } colon = (unsigned char *) strchr((char *) host, ':'); if (colon) { url->host = lowerCase(copySizedString(host, colon - host)); sscanf((char *) colon + 1, "%d", &url->port); } else { FREE(url->host); url->host = lowerCase(copyString(host)); } } } URL * urlParse(const unsigned char *urlStr) { unsigned char c; unsigned char *net_loc; unsigned char *p; unsigned char *path; unsigned char *str; URL *url; if ((!urlStr) || (!*urlStr)) { return NULL; } url = urlAlloc(); url->url = copyString(urlStr); str = copyString(urlStr); p = (unsigned char *) strchr((char *) str, '#'); if (p) { url->fragment = copyString(p); *p = 0; } p = str; c = *p; while ( (('a' <= c) && (c <= 'z')) || (('A' <= c) && (c <= 'Z')) || (('0' <= c) && (c <= '9')) || (c == '+') || (c == '.') || (c == '-') ) { p++; c = *p; } if ((c == ':') && (p > str)) { url->scheme = lowerCase(copySizedString(str, p - str)); p++; } else { p = str; } if ((p[0] == '/') && (p[1] == '/')) { net_loc = p + 2; p = (unsigned char *) strchr((char *) net_loc, '/'); if (p) { if (p > net_loc) { url->net_loc = copySizedString(net_loc, p - net_loc); } } else { if (*net_loc) { url->net_loc = copyString(net_loc); } p = (unsigned char *) strchr((char *) net_loc, 0); } } path = p; p = (unsigned char *) strchr((char *) p, '?'); if (p) { url->query = copyString(p); *p = 0; } p = path; p = (unsigned char *) strchr((char *) p, ';'); if (p) { url->params = copyString(p); *p = 0; } url->path = copyString(path); urlEmbellish(url); free(str); return url; } static unsigned char * pop(Stack *stack) { unsigned char *result; StackEntry *top; if (stack->top) { top = stack->top; result = top->str; stack->top = top->previous; if (stack->top) { stack->top->next = NULL; } else { stack->bottom = NULL; } free(top); } else { result = NULL; } return result; } static void push(Stack *stack, unsigned char *str) { StackEntry *entry; entry = calloc(sizeof(StackEntry), 1); if (!entry) { fprintf(stderr, "cannot calloc StackEntry\n"); exit(0); } entry->str = str; entry->next = NULL; entry->previous = stack->top; if (stack->top) { stack->top->next = entry; } stack->top = entry; if (!stack->bottom) { stack->bottom = entry; } } static unsigned char * bottom(Stack *stack) { StackEntry *bottom; unsigned char *result; bottom = stack->bottom; if (bottom) { result = bottom->str; stack->bottom = bottom->next; if (stack->bottom) { stack->bottom->previous = NULL; } free(bottom); } else { result = NULL; } return result; } static Stack * stackAlloc(void) { Stack *stack; stack = calloc(sizeof(Stack), 1); if (!stack) { fprintf(stderr, "cannot calloc Stack\n"); exit(0); } return stack; } static void stackFree(Stack *stack) { free(stack); } static void urlCanonicalizePath(URL *url) { int absolute; unsigned char *begin; unsigned char *p; unsigned char *slash; Stack *stack; unsigned char *str; p = url->path; if ((!p) || (!*p)) { return; } if (p[0] == '/') { absolute = 1; p++; } else { absolute = 0; } stack = stackAlloc(); while (*p) { begin = p; p = (unsigned char *) strchr((char *) begin, '/'); if (!p) { p = (unsigned char *) strchr((char *) begin, 0); } if (p == begin) { } else if ((p == begin + 1) && (begin[0] == '.')) { } else if ( (p == begin + 2) && (begin[0] == '.') && (begin[1] == '.') ) { slash = pop(stack); str = pop(stack); if (!str) { push(stack, copyString((unsigned char *) "..")); if (*p) { push(stack, copyString( (unsigned char *) "/")); } } else if (!strcmp((char *) str, "..")) { push(stack, str); push(stack, slash); push(stack, copyString((unsigned char *) "..")); if (*p) { push(stack, copyString( (unsigned char *) "/")); } } else { free(slash); free(str); } } else { push(stack, copySizedString(begin, p - begin)); if (*p) { push(stack, copyString((unsigned char *) "/")); } } if (*p) { p++; } } if (absolute) { url->path[0] = '/'; url->path[1] = 0; } else { url->path[0] = 0; } while (1) { p = bottom(stack); if (p) { strcat((char *) url->path, (char *) p); free(p); } else { break; } } stackFree(stack); } URL * urlRelative(const unsigned char *baseURL, const unsigned char *relativeURL) { URL *base; int len; URL *rel; unsigned char *tmp; if ((!baseURL) || (!*baseURL)) { return urlParse(relativeURL); } if ((!relativeURL) || (!*relativeURL)) { return urlParse(baseURL); } rel = urlParse(relativeURL); if (rel->scheme) { return rel; } else { base = urlParse(baseURL); if (base->scheme) { rel->scheme = copyString(base->scheme); } else { /* XXX Base is supposed to have scheme. Oh well. */ return rel; } } if (rel->net_loc) { goto step7; } else { rel->net_loc = copyString(base->net_loc); } if (rel->path && rel->path[0] == '/') { goto step7; } if ((!rel->path) || (!*rel->path)) { FREE(rel->path); rel->path = copyString(base->path); if (rel->params) { goto step7; } rel->params = copyString(base->params); if (rel->query) { goto step7; } rel->query = copyString(base->query); goto step7; } if (base->pathWithoutFile) { tmp = rel->path; rel->path = appendString(base->pathWithoutFile, rel->path); FREE(tmp); } urlCanonicalizePath(rel); step7: len = strlen((char *) rel->scheme); len += 1; /* ":" */ if (rel->net_loc) { len += 2 + strlen((char *) rel->net_loc); /* "//net_loc" */ } if (rel->path) { len += strlen((char *) rel->path); } if (rel->params) { len += strlen((char *) rel->params); } if (rel->query) { len += strlen((char *) rel->query); } if (rel->fragment) { len += strlen((char *) rel->fragment); } FREE(rel->url); rel->url = calloc(len + 1, 1); if (!rel->url) { fprintf(stderr, "cannot calloc url\n"); exit(0); } strcpy((char *) rel->url, (char *) rel->scheme); strcat((char *) rel->url, ":"); if (rel->net_loc) { strcat((char *) rel->url, "//"); strcat((char *) rel->url, (char *) rel->net_loc); } if (rel->path) { strcat((char *) rel->url, (char *) rel->path); } if (rel->params) { strcat((char *) rel->url, (char *) rel->params); } if (rel->query) { strcat((char *) rel->url, (char *) rel->query); } if (rel->fragment) { strcat((char *) rel->url, (char *) rel->fragment); } urlEmbellish(rel); urlFree(base); return rel; } void urlDecode(unsigned char *url) { unsigned char c; unsigned char *in; unsigned char *out; int tmp; in = url; out = url; while (1) { c = *in++; if (!c) { break; } else if (c == '%') { sscanf((char *) in, "%02x", &tmp); if (*in) { in++; if (*in) { in++; } } *out++ = tmp; } else { *out++ = c; } } *out++ = 0; } #ifdef URL_TEST static unsigned char *baseURLTest = "http://a/b/c/d;p?q#f"; static char *relativeURLTests[] = { "g:h", "g:h", "g", "http://a/b/c/g", "./g", "http://a/b/c/g", "g/", "http://a/b/c/g/", "/g", "http://a/g", "//g", "http://g", "?y", "http://a/b/c/d;p?y", "g?y", "http://a/b/c/g?y", "g?y/./x", "http://a/b/c/g?y/./x", "#s", "http://a/b/c/d;p?q#s", "g#s", "http://a/b/c/g#s", "g#s/./x", "http://a/b/c/g#s/./x", "g?y#s", "http://a/b/c/g?y#s", ";x", "http://a/b/c/d;x", "g;x", "http://a/b/c/g;x", "g;x?y#s", "http://a/b/c/g;x?y#s", ".", "http://a/b/c/", "./", "http://a/b/c/", "..", "http://a/b/", "../", "http://a/b/", "../g", "http://a/b/g", "../..", "http://a/", "../../", "http://a/", "../../g", "http://a/g", "", "http://a/b/c/d;p?q#f", "../../../g", "http://a/../g", "../../../../g", "http://a/../../g", "/./g", "http://a/./g", "/../g", "http://a/../g", "g.", "http://a/b/c/g.", ".g", "http://a/b/c/.g", "g..", "http://a/b/c/g..", "..g", "http://a/b/c/..g", "./../g", "http://a/b/g", "./g/.", "http://a/b/c/g/", "g/./h", "http://a/b/c/g/h", "g/../h", "http://a/b/c/h", "http:g", "http:g", "http:", "http:", NULL }; static unsigned char *loginTest = "ftp://user:password@ftp.domain.com:64000/path1/path2/file#fragment"; static void printURL(URL *url) { printf("url %s\n", url->url); printf("scheme %s, ", url->scheme ? url->scheme : "NULL"); printf("login %s, ", url->login ? url->login : "NULL"); printf("password %s, ", url->password ? url->password : "NULL"); printf("host %s, ", url->host ? url->host : "NULL"); printf("port %d, ", url->port); printf("path %s, ", url->path ? url->path : "NULL"); printf("file %s, ", url->file ? url->file : "NULL"); printf("fragment %s\n", url->fragment ? url->fragment : "NULL"); printf("======================================\n"); } int main(int argc, char *argv[]) { int failures; char **p; int total; URL *url; printURL(urlParse(loginTest)); failures = 0; total = 0; p = relativeURLTests; while (p[0]) { total++; url = urlRelative(baseURLTest, p[0]); if (url) { if (strcmp((char *) url->url, p[1])) { failures++; printf("urlRelative failed:\n"); printf("\"%s\" +\n", baseURLTest); printf("\"%s\" =\n", p[0]); printf("\"%s\"\n", url->url); printf("should be:\n"); printf("\"%s\"\n", p[1]); printf("-------------------\n"); } urlFree(url); } else { failures++; printf("urlRelative return NULL for \"%s\"\n", p[0]); printf("----------------------------------\n"); } p += 2; } printf("%d failures out of %d\n", failures, total); return 0; } #endif /* URL_TEST */