DENG-2918-UDFs for URL parsing (#5141)
* DENG-2918-UDFs for URL parsing * removing get_host because net.host() exists
This commit is contained in:
Родитель
311aab70ef
Коммит
dc60b040f5
|
@ -0,0 +1,4 @@
|
|||
This UDF extracts path from a URL string.
|
||||
|
||||
The path is everything after the host and before parameters.
|
||||
This function returns "/" if there is no path.
|
|
@ -0,0 +1,2 @@
|
|||
description: Extract the Path from a URL
|
||||
friendly_name: Get URL Path
|
|
@ -0,0 +1,21 @@
|
|||
CREATE OR REPLACE FUNCTION utils.get_url_path(url STRING)
|
||||
RETURNS STRING AS (
|
||||
"/" || COALESCE(
|
||||
REGEXP_EXTRACT(REPLACE(REPLACE(url, "https://", ""), "http://", ""), r"\/([^&?#]*)"),
|
||||
""
|
||||
)
|
||||
);
|
||||
|
||||
-- Tests
|
||||
SELECT
|
||||
mozfun.assert.equals("/path", utils.get_url_path("https://some-url.com/path")),
|
||||
mozfun.assert.equals("/path", utils.get_url_path("http://some-url.com/path?more")),
|
||||
mozfun.assert.equals("/path", utils.get_url_path("http://some-url.com/path#more")),
|
||||
mozfun.assert.equals("/path", utils.get_url_path("http://some-url.com/path?more&utm=123")),
|
||||
mozfun.assert.equals(
|
||||
"/path/with/multiple/slashes",
|
||||
utils.get_url_path("http://some-url.com/path/with/multiple/slashes?more")
|
||||
),
|
||||
mozfun.assert.equals("/", utils.get_url_path("https://some-url.com")),
|
||||
mozfun.assert.equals("/", utils.get_url_path("https://some-url.com/")),
|
||||
mozfun.assert.equals("/path", utils.get_url_path("some-url.com/path"))
|
Загрузка…
Ссылка в новой задаче