Webserver: Sanitize string passed to origin param (#14738)

Follow-up of #12459 & #10334

Since https://github.com/python/cpython/pull/24297/files (bpo-42967)
also removed ';' as query argument separator, we remove query arguments
with semicolons.
This commit is contained in:
Kaxil Naik 2021-03-12 09:48:59 +00:00 коммит произвёл GitHub
Родитель d2c2a2285c
Коммит 409c249121
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 20 добавлений и 19 удалений

Просмотреть файл

@ -129,8 +129,18 @@ def get_safe_url(url):
parsed = urlparse(url)
# If the url is relative & it contains semicolon, redirect it to homepage to avoid
# potential XSS. (Similar to https://github.com/python/cpython/pull/24297/files (bpo-42967))
if parsed.netloc == '' and parsed.scheme == '' and ';' in unquote(url):
return url_for('Airflow.index')
query = parse_qsl(parsed.query, keep_blank_values=True)
url = parsed._replace(query=urlencode(query)).geturl()
# Remove all the query elements containing semicolon
# As part of https://github.com/python/cpython/pull/24297/files (bpo-42967)
# semicolon was already removed as a separator for query arguments by default
sanitized_query = [query_arg for query_arg in query if ';' not in query_arg[1]]
url = parsed._replace(query=urlencode(sanitized_query)).geturl()
if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs:
return url

Просмотреть файл

@ -32,7 +32,7 @@ from datetime import datetime as dt, timedelta
from typing import Any, Dict, Generator, List, NamedTuple
from unittest import mock
from unittest.mock import PropertyMock
from urllib.parse import parse_qsl, quote_plus
from urllib.parse import quote_plus
import jinja2
import pytest
@ -2776,9 +2776,10 @@ class TestTriggerDag(TestBase):
[
("javascript:alert(1)", "/home"),
("http://google.com", "/home"),
("36539'%3balert(1)%2f%2f166", "/home"),
(
"%2Ftree%3Fdag_id%3Dexample_bash_operator';alert(33)//",
"/tree?dag_id=example_bash_operator%27%3Balert%2833%29%2F%2F",
"/home",
),
("%2Ftree%3Fdag_id%3Dexample_bash_operator", "/tree?dag_id=example_bash_operator"),
("%2Fgraph%3Fdag_id%3Dexample_bash_operator", "/graph?dag_id=example_bash_operator"),
@ -2787,13 +2788,6 @@ class TestTriggerDag(TestBase):
def test_trigger_dag_form_origin_url(self, test_origin, expected_origin):
test_dag_id = "example_bash_operator"
# https://github.com/python/cpython/pull/24297/files
# Check if tests are running with a Python version containing the above fix
# where ";" is removed as a separator
if parse_qsl(";a=b") != [(';a', 'b')] and ";" in test_origin:
expected_origin = expected_origin.replace("%3B", "&")
expected_origin += "="
resp = self.client.get(f'trigger?dag_id={test_dag_id}&origin={test_origin}')
self.check_content_in_response(
'<button type="button" class="btn" onclick="location.href = \'{}\'; return false">'.format(
@ -3325,10 +3319,14 @@ class TestHelperFunctions(TestBase):
[
("", "/home"),
("http://google.com", "/home"),
("36539'%3balert(1)%2f%2f166", "/home"),
(
"http://localhost:8080/trigger?dag_id=test&origin=36539%27%3balert(1)%2f%2f166&abc=2",
"http://localhost:8080/trigger?dag_id=test&abc=2",
),
(
"http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag';alert(33)//",
"http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3F"
"dag_id%25test_dag%27%3Balert%2833%29%2F%2F",
"http://localhost:8080/trigger?dag_id=test_dag",
),
(
"http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag",
@ -3338,13 +3336,6 @@ class TestHelperFunctions(TestBase):
)
@mock.patch("airflow.www.views.url_for")
def test_get_safe_url(self, test_url, expected_url, mock_url_for):
# https://github.com/python/cpython/pull/24297/files
# Check if tests are running with a Python version containing the above fix
# where ";" is removed as a separator
if parse_qsl(";a=b") != [(';a', 'b')] and ";" in test_url:
expected_url = expected_url.replace("%3B", "&")
expected_url += "="
mock_url_for.return_value = "/home"
with self.app.test_request_context(base_url="http://localhost:8080"):
assert get_safe_url(test_url) == expected_url