Merge pull request #4369 from RasmusWL/python-ospathjoin-taintstep

Python: Add taint-step for os.path.join
This commit is contained in:
Taus 2020-09-30 13:35:16 +02:00 коммит произвёл GitHub
Родитель b1c826e5c0 1595fed2d6
Коммит d694777894
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 142 добавлений и 6 удалений

Просмотреть файл

@ -101,7 +101,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
nodeFrom.getNode() = object and
method_name in ["partition", "rpartition", "rsplit", "split", "splitlines"]
or
// List[str] -> str
// Iterable[str] -> str
// TODO: check if these should be handled differently in regards to content
method_name = "join" and
nodeFrom.getNode() = call.getArg(0)
@ -130,7 +130,6 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
// f-strings
nodeTo.asExpr().(Fstring).getAValue() = nodeFrom.asExpr()
// TODO: Handle encode/decode from base64/quopri
// TODO: Handle os.path.join
// TODO: Handle functions in https://docs.python.org/3/library/binascii.html
}

Просмотреть файл

@ -5,9 +5,11 @@
private import python
private import experimental.dataflow.DataFlow
private import experimental.dataflow.TaintTracking
private import experimental.dataflow.RemoteFlowSources
private import experimental.semmle.python.Concepts
/** Provides models for the Python standard library. */
private module Stdlib {
/** Gets a reference to the `os` module. */
DataFlow::Node os(DataFlow::TypeTracker t) {
@ -20,6 +22,7 @@ private module Stdlib {
/** Gets a reference to the `os` module. */
DataFlow::Node os() { result = os(DataFlow::TypeTracker::end()) }
/** Provides models for the `os` module. */
module os {
/** Gets a reference to the `os.system` function. */
DataFlow::Node system(DataFlow::TypeTracker t) {
@ -48,6 +51,41 @@ private module Stdlib {
/** Gets a reference to the `os.popen` function. */
DataFlow::Node popen() { result = os::popen(DataFlow::TypeTracker::end()) }
/** Gets a reference to the `os.path` module. */
private DataFlow::Node path(DataFlow::TypeTracker t) {
t.start() and
(
result = DataFlow::importMember("os", "path")
or
result = DataFlow::importModule("os.path")
)
or
t.startInAttr("path") and
result = os()
or
exists(DataFlow::TypeTracker t2 | result = path(t2).track(t2, t))
}
/** Gets a reference to the `os.path` module. */
DataFlow::Node path() { result = path(DataFlow::TypeTracker::end()) }
/** Provides models for the `os.path` module */
module path {
/** Gets a reference to the `os.path.join` function. */
private DataFlow::Node join(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importMember("os.path", "join")
or
t.startInAttr("join") and
result = os::path()
or
exists(DataFlow::TypeTracker t2 | result = join(t2).track(t2, t))
}
/** Gets a reference to the `os.join` module. */
DataFlow::Node join() { result = join(DataFlow::TypeTracker::end()) }
}
}
/**
@ -73,4 +111,16 @@ private module Stdlib {
result.asCfgNode() = this.asCfgNode().(CallNode).getArg(0)
}
}
/** An additional taint step for calls to `os.path.join` */
private class OsPathJoinCallAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
exists(CallNode call |
nodeTo.asCfgNode() = call and
call.getFunction() = os::path::join().asCfgNode() and
call.getAnArg() = nodeFrom.asCfgNode()
)
// TODO: Handle pathlib (like we do for os.path.join)
}
}
}

Просмотреть файл

@ -1,6 +1,18 @@
| test_collections.py:16 | ok | test_access | tainted_list.copy() |
| test_collections.py:24 | ok | list_clear | tainted_list |
| test_collections.py:27 | fail | list_clear | tainted_list |
| test_pathlib.py:26 | fail | test_basic | tainted_path |
| test_pathlib.py:28 | fail | test_basic | tainted_pure_path |
| test_pathlib.py:29 | fail | test_basic | tainted_pure_posix_path |
| test_pathlib.py:30 | fail | test_basic | tainted_pure_windows_path |
| test_pathlib.py:32 | fail | test_basic | BinaryExpr |
| test_pathlib.py:33 | fail | test_basic | BinaryExpr |
| test_pathlib.py:35 | fail | test_basic | tainted_path.joinpath(..) |
| test_pathlib.py:36 | fail | test_basic | pathlib.Path(..).joinpath(..) |
| test_pathlib.py:37 | fail | test_basic | pathlib.Path(..).joinpath(..) |
| test_pathlib.py:39 | fail | test_basic | str(..) |
| test_pathlib.py:49 | fail | test_basic | tainted_posix_path |
| test_pathlib.py:55 | fail | test_basic | tainted_windows_path |
| test_string.py:17 | ok | str_methods | ts.casefold() |
| test_string.py:19 | ok | str_methods | ts.format_map(..) |
| test_string.py:20 | ok | str_methods | "{unsafe}".format_map(..) |

Просмотреть файл

@ -0,0 +1,60 @@
# Add taintlib to PATH so it can be imported during runtime without any hassle
import sys; import os; sys.path.append(os.path.dirname(os.path.dirname((__file__))))
from taintlib import *
# This has no runtime impact, but allows autocomplete to work
from typing import Iterable, TYPE_CHECKING
if TYPE_CHECKING:
from ..taintlib import *
# Actual tests
import pathlib
# pathlib was added in 3.4
def test_basic():
print("\n# test_basic")
ts = TAINTED_STRING
tainted_path = pathlib.Path(ts)
tainted_pure_path = pathlib.PurePath(ts)
tainted_pure_posix_path = pathlib.PurePosixPath(ts)
tainted_pure_windows_path = pathlib.PureWindowsPath(ts)
ensure_tainted(
tainted_path,
tainted_pure_path,
tainted_pure_posix_path,
tainted_pure_windows_path,
pathlib.Path("foo") / ts,
ts / pathlib.Path("foo"),
tainted_path.joinpath("foo", "bar"),
pathlib.Path("foo").joinpath(tainted_path, "bar"),
pathlib.Path("foo").joinpath("bar", tainted_path),
str(tainted_path),
# TODO: Tainted methods and attributes
# https://docs.python.org/3.8/library/pathlib.html#methods-and-properties
)
if os.name == "posix":
tainted_posix_path = pathlib.PosixPath(ts)
ensure_tainted(
tainted_posix_path,
)
if os.name == "nt":
tainted_windows_path = pathlib.WindowsPath(ts)
ensure_tainted(
tainted_windows_path,
)
# Make tests runable
test_basic()

Просмотреть файл

@ -22,7 +22,7 @@ def str_methods():
def binary_decode_encode():
print("\n#percent_fmt")
print("\n# binary_decode_encode")
tb = TAINTED_BYTES
import base64
@ -42,7 +42,7 @@ def binary_decode_encode():
def f_strings():
print("\n#f_strings")
print("\n# f_strings")
ts = TAINTED_STRING
ensure_tainted(f"foo {ts} bar")

Просмотреть файл

@ -137,6 +137,9 @@
| test_string.py:143 | fail | binary_decode_encode | base64.decodestring(..) |
| test_string.py:148 | fail | binary_decode_encode | quopri.encodestring(..) |
| test_string.py:149 | fail | binary_decode_encode | quopri.decodestring(..) |
| test_string.py:158 | ok | test_os_path_join | os.path.join(..) |
| test_string.py:159 | ok | test_os_path_join | os.path.join(..) |
| test_string.py:160 | ok | test_os_path_join | os.path.join(..) |
| test_unpacking.py:16 | ok | unpacking | a |
| test_unpacking.py:16 | ok | unpacking | b |
| test_unpacking.py:16 | ok | unpacking | c |

Просмотреть файл

@ -107,7 +107,7 @@ def non_syntactic():
def percent_fmt():
print("\n#percent_fmt")
print("\n# percent_fmt")
ts = TAINTED_STRING
tainted_fmt = ts + " %s %s"
ensure_tainted(
@ -118,7 +118,7 @@ def percent_fmt():
def binary_decode_encode():
print("\n#percent_fmt")
print("\n# binary_decode_encode")
tb = TAINTED_BYTES
import base64
@ -150,6 +150,17 @@ def binary_decode_encode():
)
def test_os_path_join():
import os
print("\n# test_os_path_join")
ts = TAINTED_STRING
ensure_tainted(
os.path.join(ts, "foo", "bar"),
os.path.join(ts),
os.path.join("foo", "bar", ts),
)
# Make tests runable
str_operations()
@ -157,3 +168,4 @@ str_methods()
non_syntactic()
percent_fmt()
binary_decode_encode()
test_os_path_join()