From 655d44d139489625e77cf6790d3622f610c2aab9 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Sat, 1 Jun 2024 12:03:34 +0200 Subject: [PATCH] urlapi: add CURLU_NO_GUESS_SCHEME Used for extracting: - when used asking for a scheme, it will return CURLUE_NO_SCHEME if the stored information was a guess - when used asking for a URL, the URL is returned without a scheme, like when previously given to the URL parser when it was asked to guess - as soon as the scheme is set explicitly, it is no longer internally marked as guessed The idea being: 1. allow a user to figure out if a URL's scheme was set as a result of guessing 2. extract the URL without a guessed scheme 3. this makes it work similar to how we already deal with port numbers Extend test 1560 to verify. Closes #13616 --- docs/libcurl/curl_url_get.md | 16 ++++++++++++++++ docs/libcurl/symbols-in-versions | 1 + include/curl/urlapi.h | 1 + lib/urlapi.c | 16 ++++++++++++++-- tests/libtest/lib1560.c | 6 ++++++ 5 files changed, 38 insertions(+), 2 deletions(-) diff --git a/docs/libcurl/curl_url_get.md b/docs/libcurl/curl_url_get.md index 54041180f..d49dd84cf 100644 --- a/docs/libcurl/curl_url_get.md +++ b/docs/libcurl/curl_url_get.md @@ -125,6 +125,22 @@ nothing following the hash sign. (Added in curl 8.8.0) +## CURLU_NO_GUESS_SCHEME + +When this flag is used in curl_url_get(), it treats the scheme as non-existing +if it was set as a result of a previous guess; when CURLU_GUESS_SCHEME was +used parsing a URL. + +Using this flag when getting CURLUPART_SCHEME if the scheme was set as the +result of a guess makes curl_url_get() return CURLUE_NO_SCHEME. + +Using this flag when getting CURLUPART_URL if the scheme was set as the result +of a guess makes curl_url_get() return the full URL without the scheme +component. Such a URL can then only be parsed with curl_url_set() if +CURLU_GUESS_SCHEME is used. + +(Added in curl 8.9.0) + # PARTS ## CURLUPART_URL diff --git a/docs/libcurl/symbols-in-versions b/docs/libcurl/symbols-in-versions index e5531df92..90ffa378b 100644 --- a/docs/libcurl/symbols-in-versions +++ b/docs/libcurl/symbols-in-versions @@ -1068,6 +1068,7 @@ CURLU_GET_EMPTY 8.8.0 CURLU_GUESS_SCHEME 7.62.0 CURLU_NO_AUTHORITY 7.67.0 CURLU_NO_DEFAULT_PORT 7.62.0 +CURLU_NO_GUESS_SCHEME 8.9.0 CURLU_NON_SUPPORT_SCHEME 7.62.0 CURLU_PATH_AS_IS 7.62.0 CURLU_PUNY2IDN 8.3.0 diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h index 19388c3c0..c29631cf4 100644 --- a/include/curl/urlapi.h +++ b/include/curl/urlapi.h @@ -102,6 +102,7 @@ typedef enum { #define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments when extracting the URL or the components */ +#define CURLU_NO_GUESS_SCHEME (1<<14) /* for get, don't accept a guess */ typedef struct Curl_URL CURLU; diff --git a/lib/urlapi.c b/lib/urlapi.c index eb0396687..c46898a23 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -82,6 +82,7 @@ struct Curl_URL { unsigned short portnum; /* the numerical version (if 'port' is set) */ BIT(query_present); /* to support blank */ BIT(fragment_present); /* to support blank */ + BIT(guessed_scheme); /* when a URL without scheme is parsed */ }; #define DEFAULT_SCHEME "https" @@ -1223,6 +1224,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) result = CURLUE_OUT_OF_MEMORY; goto fail; } + u->guessed_scheme = TRUE; } } else if(flags & CURLU_NO_AUTHORITY) { @@ -1437,6 +1439,8 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, ptr = u->scheme; ifmissing = CURLUE_NO_SCHEME; urldecode = FALSE; /* never for schemes */ + if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme) + return CURLUE_NO_SCHEME; break; case CURLUPART_USER: ptr = u->user; @@ -1525,6 +1529,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, return CURLUE_NO_HOST; else { const struct Curl_handler *h = NULL; + char schemebuf[MAX_SCHEME_LEN + 5]; if(u->scheme) scheme = u->scheme; else if(flags & CURLU_DEFAULT_SCHEME) @@ -1595,8 +1600,13 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what, } } - url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - scheme, + if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme) + msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme); + else + schemebuf[0] = 0; + + url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + schemebuf, u->user ? u->user : "", u->password ? ":": "", u->password ? u->password : "", @@ -1718,6 +1728,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, break; case CURLUPART_SCHEME: storep = &u->scheme; + u->guessed_scheme = FALSE; break; case CURLUPART_USER: storep = &u->user; @@ -1790,6 +1801,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what, } else return CURLUE_BAD_SCHEME; + u->guessed_scheme = FALSE; break; } case CURLUPART_USER: diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 934fc78fd..0109d6edd 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -151,6 +151,9 @@ struct clearurlcase { }; static const struct testcase get_parts_list[] ={ + {"curl.se", + "[10] | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]", + CURLU_GUESS_SCHEME, CURLU_NO_GUESS_SCHEME, CURLUE_OK}, {"https://curl.se:0/#", "https | [11] | [12] | [13] | curl.se | 0 | / | [16] | ", 0, CURLU_GET_EMPTY, CURLUE_OK}, @@ -526,6 +529,9 @@ static const struct testcase get_parts_list[] ={ }; static const struct urltestcase get_url_list[] = { + {"example.com", + "example.com/", + CURLU_GUESS_SCHEME, CURLU_NO_GUESS_SCHEME, CURLUE_OK}, {"http://user@example.com?#", "http://user@example.com/?#", 0, CURLU_GET_EMPTY, CURLUE_OK},