urlapi: add CURLU_NO_GUESS_SCHEME

Used for extracting:

- when used asking for a scheme, it will return CURLUE_NO_SCHEME if the
  stored information was a guess

- when used asking for a URL, the URL is returned without a scheme, like
  when previously given to the URL parser when it was asked to guess

- as soon as the scheme is set explicitly, it is no longer internally
  marked as guessed

The idea being:

1. allow a user to figure out if a URL's scheme was set as a result of
  guessing

2. extract the URL without a guessed scheme

3. this makes it work similar to how we already deal with port numbers

Extend test 1560 to verify.

Closes #13616
This commit is contained in:
Daniel Stenberg 2024-06-01 12:03:34 +02:00
Родитель 80aa519545
Коммит 655d44d139
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 5CC908FDB71E12C2
5 изменённых файлов: 38 добавлений и 2 удалений

Просмотреть файл

@ -125,6 +125,22 @@ nothing following the hash sign.
(Added in curl 8.8.0) (Added in curl 8.8.0)
## CURLU_NO_GUESS_SCHEME
When this flag is used in curl_url_get(), it treats the scheme as non-existing
if it was set as a result of a previous guess; when CURLU_GUESS_SCHEME was
used parsing a URL.
Using this flag when getting CURLUPART_SCHEME if the scheme was set as the
result of a guess makes curl_url_get() return CURLUE_NO_SCHEME.
Using this flag when getting CURLUPART_URL if the scheme was set as the result
of a guess makes curl_url_get() return the full URL without the scheme
component. Such a URL can then only be parsed with curl_url_set() if
CURLU_GUESS_SCHEME is used.
(Added in curl 8.9.0)
# PARTS # PARTS
## CURLUPART_URL ## CURLUPART_URL

Просмотреть файл

@ -1068,6 +1068,7 @@ CURLU_GET_EMPTY 8.8.0
CURLU_GUESS_SCHEME 7.62.0 CURLU_GUESS_SCHEME 7.62.0
CURLU_NO_AUTHORITY 7.67.0 CURLU_NO_AUTHORITY 7.67.0
CURLU_NO_DEFAULT_PORT 7.62.0 CURLU_NO_DEFAULT_PORT 7.62.0
CURLU_NO_GUESS_SCHEME 8.9.0
CURLU_NON_SUPPORT_SCHEME 7.62.0 CURLU_NON_SUPPORT_SCHEME 7.62.0
CURLU_PATH_AS_IS 7.62.0 CURLU_PATH_AS_IS 7.62.0
CURLU_PUNY2IDN 8.3.0 CURLU_PUNY2IDN 8.3.0

Просмотреть файл

@ -102,6 +102,7 @@ typedef enum {
#define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments #define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments
when extracting the URL or the when extracting the URL or the
components */ components */
#define CURLU_NO_GUESS_SCHEME (1<<14) /* for get, don't accept a guess */
typedef struct Curl_URL CURLU; typedef struct Curl_URL CURLU;

Просмотреть файл

@ -82,6 +82,7 @@ struct Curl_URL {
unsigned short portnum; /* the numerical version (if 'port' is set) */ unsigned short portnum; /* the numerical version (if 'port' is set) */
BIT(query_present); /* to support blank */ BIT(query_present); /* to support blank */
BIT(fragment_present); /* to support blank */ BIT(fragment_present); /* to support blank */
BIT(guessed_scheme); /* when a URL without scheme is parsed */
}; };
#define DEFAULT_SCHEME "https" #define DEFAULT_SCHEME "https"
@ -1223,6 +1224,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
result = CURLUE_OUT_OF_MEMORY; result = CURLUE_OUT_OF_MEMORY;
goto fail; goto fail;
} }
u->guessed_scheme = TRUE;
} }
} }
else if(flags & CURLU_NO_AUTHORITY) { else if(flags & CURLU_NO_AUTHORITY) {
@ -1437,6 +1439,8 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
ptr = u->scheme; ptr = u->scheme;
ifmissing = CURLUE_NO_SCHEME; ifmissing = CURLUE_NO_SCHEME;
urldecode = FALSE; /* never for schemes */ urldecode = FALSE; /* never for schemes */
if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
return CURLUE_NO_SCHEME;
break; break;
case CURLUPART_USER: case CURLUPART_USER:
ptr = u->user; ptr = u->user;
@ -1525,6 +1529,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
return CURLUE_NO_HOST; return CURLUE_NO_HOST;
else { else {
const struct Curl_handler *h = NULL; const struct Curl_handler *h = NULL;
char schemebuf[MAX_SCHEME_LEN + 5];
if(u->scheme) if(u->scheme)
scheme = u->scheme; scheme = u->scheme;
else if(flags & CURLU_DEFAULT_SCHEME) else if(flags & CURLU_DEFAULT_SCHEME)
@ -1595,8 +1600,13 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
} }
} }
url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s", if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
scheme, msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
else
schemebuf[0] = 0;
url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
schemebuf,
u->user ? u->user : "", u->user ? u->user : "",
u->password ? ":": "", u->password ? ":": "",
u->password ? u->password : "", u->password ? u->password : "",
@ -1718,6 +1728,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
break; break;
case CURLUPART_SCHEME: case CURLUPART_SCHEME:
storep = &u->scheme; storep = &u->scheme;
u->guessed_scheme = FALSE;
break; break;
case CURLUPART_USER: case CURLUPART_USER:
storep = &u->user; storep = &u->user;
@ -1790,6 +1801,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
} }
else else
return CURLUE_BAD_SCHEME; return CURLUE_BAD_SCHEME;
u->guessed_scheme = FALSE;
break; break;
} }
case CURLUPART_USER: case CURLUPART_USER:

Просмотреть файл

@ -151,6 +151,9 @@ struct clearurlcase {
}; };
static const struct testcase get_parts_list[] ={ static const struct testcase get_parts_list[] ={
{"curl.se",
"[10] | [11] | [12] | [13] | curl.se | [15] | / | [16] | [17]",
CURLU_GUESS_SCHEME, CURLU_NO_GUESS_SCHEME, CURLUE_OK},
{"https://curl.se:0/#", {"https://curl.se:0/#",
"https | [11] | [12] | [13] | curl.se | 0 | / | [16] | ", "https | [11] | [12] | [13] | curl.se | 0 | / | [16] | ",
0, CURLU_GET_EMPTY, CURLUE_OK}, 0, CURLU_GET_EMPTY, CURLUE_OK},
@ -526,6 +529,9 @@ static const struct testcase get_parts_list[] ={
}; };
static const struct urltestcase get_url_list[] = { static const struct urltestcase get_url_list[] = {
{"example.com",
"example.com/",
CURLU_GUESS_SCHEME, CURLU_NO_GUESS_SCHEME, CURLUE_OK},
{"http://user@example.com?#", {"http://user@example.com?#",
"http://user@example.com/?#", "http://user@example.com/?#",
0, CURLU_GET_EMPTY, CURLUE_OK}, 0, CURLU_GET_EMPTY, CURLUE_OK},