Merge pull request #10862 from erik-krogh/unsafeCodeConstruction

Rb: Add an `unsafe-code-construction` query
This commit is contained in:
Erik Krogh Kristensen 2023-01-16 13:22:58 +01:00 коммит произвёл GitHub
Родитель 7f880a24df f2658a0936
Коммит 59a8b21851
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
19 изменённых файлов: 472 добавлений и 1 удалений

Просмотреть файл

@ -0,0 +1,119 @@
/**
* Provides default sources, sinks and sanitizers for reasoning about
* code constructed from library input vulnerabilities, as
* well as extension points for adding your own.
*/
private import ruby
private import codeql.ruby.ApiGraphs
private import codeql.ruby.frameworks.core.Gem::Gem as Gem
private import codeql.ruby.Concepts as Concepts
/**
* Module containing sources, sinks, and sanitizers for code constructed from library input.
*/
module UnsafeCodeConstruction {
/** A source for code constructed from library input vulnerabilities. */
abstract class Source extends DataFlow::Node { }
/** An input parameter to a gem seen as a source. */
private class LibraryInputAsSource extends Source instanceof DataFlow::ParameterNode {
LibraryInputAsSource() {
this = Gem::getALibraryInput() and
not this.getName() = "code"
}
}
/** A sink for code constructed from library input vulnerabilities. */
abstract class Sink extends DataFlow::Node {
/**
* Gets the node where the unsafe code is executed.
*/
abstract DataFlow::Node getCodeSink();
/**
* Gets the type of sink.
*/
string getSinkType() { result = "code construction" }
}
/** Gets a node that is eventually executed as code at `codeExec`. */
DataFlow::Node getANodeExecutedAsCode(Concepts::CodeExecution codeExec) {
result = getANodeExecutedAsCode(TypeTracker::TypeBackTracker::end(), codeExec)
}
import codeql.ruby.typetracking.TypeTracker as TypeTracker
/** Gets a node that is eventually executed as code at `codeExec`, type-tracked with `t`. */
private DataFlow::LocalSourceNode getANodeExecutedAsCode(
TypeTracker::TypeBackTracker t, Concepts::CodeExecution codeExec
) {
t.start() and
result = codeExec.getCode().getALocalSource() and
codeExec.runsArbitraryCode() // methods like `Object.send` is benign here, because of the string-construction the attacker cannot control the entire method name
or
exists(TypeTracker::TypeBackTracker t2 |
result = getANodeExecutedAsCode(t2, codeExec).backtrack(t2, t)
)
}
/**
* A string constructed using a `.join(...)` call, where the resulting string ends up being executed as code.
*/
class ArrayJoin extends Sink {
Concepts::CodeExecution s;
ArrayJoin() {
exists(DataFlow::CallNode call |
call.getMethodName() = "join" and
call.getNumberOfArguments() = 1 and // any string. E.g. ";" or "\n".
call = getANodeExecutedAsCode(s) and
this = call.getReceiver()
)
}
override DataFlow::Node getCodeSink() { result = s }
override string getSinkType() { result = "array" }
}
/**
* A string constructed from a string-literal (e.g. `"foo #{sink}"`),
* where the resulting string ends up being executed as a code.
*/
class StringInterpolationAsSink extends Sink {
Concepts::CodeExecution s;
StringInterpolationAsSink() {
exists(Ast::StringlikeLiteral lit |
any(DataFlow::Node n | n.asExpr().getExpr() = lit) = getANodeExecutedAsCode(s) and
this.asExpr().getExpr() = lit.getComponent(_)
)
}
override DataFlow::Node getCodeSink() { result = s }
override string getSinkType() { result = "string interpolation" }
}
import codeql.ruby.security.TaintedFormatStringSpecific as TaintedFormat
/**
* A string constructed from a printf-style call,
* where the resulting string ends up being executed as a code.
*/
class TaintedFormatStringAsSink extends Sink {
Concepts::CodeExecution s;
TaintedFormatStringAsSink() {
exists(TaintedFormat::PrintfStyleCall call |
call = getANodeExecutedAsCode(s) and
this = [call.getFormatArgument(_), call.getFormatString()]
)
}
override DataFlow::Node getCodeSink() { result = s }
override string getSinkType() { result = "string format" }
}
}

Просмотреть файл

@ -0,0 +1,49 @@
/**
* Provides a taint-tracking configuration for reasoning about code
* constructed from library input vulnerabilities.
*
* Note, for performance reasons: only import this file if `Configuration` is needed,
* otherwise `UnsafeCodeConstructionCustomizations` should be imported instead.
*/
import codeql.ruby.DataFlow
import UnsafeCodeConstructionCustomizations::UnsafeCodeConstruction
private import codeql.ruby.TaintTracking
private import codeql.ruby.dataflow.BarrierGuards
/**
* A taint-tracking configuration for detecting code constructed from library input vulnerabilities.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "UnsafeShellCommandConstruction" }
override predicate isSource(DataFlow::Node source) { source instanceof Source }
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
override predicate isSanitizer(DataFlow::Node node) {
node instanceof StringConstCompareBarrier or
node instanceof StringConstArrayInclusionCallBarrier
}
// override to require the path doesn't have unmatched return steps
override DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureHasSourceCallContext
}
override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
// if an array element gets tainted, then we treat the entire array as tainted
exists(DataFlow::CallNode call |
call.getMethodName() = ["<<", "push", "append"] and
call.getReceiver() = succ and
pred = call.getArgument(0) and
call.getNumberOfArguments() = 1
)
or
exists(DataFlow::CallNode call |
call.getMethodName() = "[]" and
succ = call and
pred = call.getArgument(_)
)
}
}

Просмотреть файл

@ -67,7 +67,7 @@ module UnsafeShellCommandConstruction {
*/
class StringInterpolationAsSink extends Sink {
Concepts::SystemCommandExecution s;
Ast::StringLiteral lit;
Ast::StringlikeLiteral lit;
StringInterpolationAsSink() {
isUsedAsShellCommand(any(DataFlow::Node n | n.asExpr().getExpr() = lit), s) and

Просмотреть файл

@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `rb/unsafe-code-construction`, to detect libraries that unsafely construct code from their inputs.

Просмотреть файл

@ -0,0 +1,111 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>
<overview>
<p>
When a library function dynamically constructs code in a potentially unsafe way,
it's important to document to clients of the library that the function should only be
used with trusted inputs.
If the function is not documented as being potentially unsafe, then a client may
incorrectly use inputs containing unsafe code fragments, and thereby leave the
client vulnerable to code-injection attacks.
</p>
</overview>
<recommendation>
<p>
Properly document library functions that construct code from unsanitized
inputs, or avoid constructing code in the first place.
</p>
</recommendation>
<example>
<p>
The following example shows two methods implemented using <code>eval</code>: a simple
deserialization routine and a getter method.
If untrusted inputs are used with these methods,
then an attacker might be able to execute arbitrary code on the system.
</p>
<sample src="examples/UnsafeCodeConstruction.rb" />
<p>
To avoid this problem, either properly document that the function is potentially
unsafe, or use an alternative solution such as <code>JSON.parse</code> or another library
that does not allow arbitrary code to be executed.
</p>
<sample src="examples/UnsafeCodeConstructionSafe.rb" />
</example>
<example>
<p>
As another example, consider the below code which dynamically constructs
a class that has a getter method with a custom name.
</p>
<sample src="examples/UnsafeCodeConstruction2.rb" />
<p>
The example dynamically constructs a string which is then executed using <code>module_eval</code>.
This code will break if the specified name is not a valid Ruby identifier, and
if the value is controlled by an attacker, then this could lead to code-injection.
</p>
<p>
A more robust implementation, that is also immune to code-injection,
can be made by using <code>module_eval</code> with a block and using <code>define_method</code>
to define the getter method.
</p>
<sample src="examples/UnsafeCodeConstruction2Safe.rb" />
</example>
<example>
<p>
This example dynamically registers a method on another class which
forwards its arguments to a target object. This approach uses
<code>module_eval</code> and string interpolation to construct class variables
and methods.
</p>
<sample src="examples/UnsafeCodeConstruction3.rb" />
<p>
A safer approach is to use <code>class_variable_set</code> and
<code>class_variable_get</code> along with <code>define_method</code>. String
interpolation is still used to construct the class variable name, but this is
safe because <code>class_variable_set</code> is not susceptible to code-injection.
</p>
<p>
<code>send</code> is used to dynamically call the method specified by <code>name</code>.
This is a more robust alternative than the previous example, because it does not allow
arbitrary code to be executed, but it does still allow for any method to be called
on the target object.
</p>
<sample src="examples/UnsafeCodeConstruction3Safe.rb" />
</example>
<references>
<li>
OWASP:
<a href="https://www.owasp.org/index.php/Code_Injection">Code Injection</a>.
</li>
<li>
Wikipedia: <a href="https://en.wikipedia.org/wiki/Code_injection">Code Injection</a>.
</li>
<li>
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.2/Module.html#method-i-define_method">define_method</a>.
</li>
<li>
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.2/Module.html#method-i-class_variable_set">class_variable_set</a>.
</li>
</references>
</qhelp>

Просмотреть файл

@ -0,0 +1,23 @@
/**
* @name Unsafe code constructed from library input
* @description Using externally controlled strings to construct code may allow a malicious
* user to execute arbitrary code.
* @kind path-problem
* @problem.severity warning
* @security-severity 6.1
* @precision medium
* @id rb/unsafe-code-construction
* @tags security
* external/cwe/cwe-094
* external/cwe/cwe-079
* external/cwe/cwe-116
*/
import codeql.ruby.security.UnsafeCodeConstructionQuery
import DataFlow::PathGraph
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
where cfg.hasFlowPath(source, sink) and sinkNode = sink.getNode()
select sink.getNode(), source, sink,
"This " + sinkNode.getSinkType() + " which depends on $@ is later $@.", source.getNode(),
"library input", sinkNode.getCodeSink(), "interpreted as code"

Просмотреть файл

@ -0,0 +1,10 @@
module MyLib
def unsafeDeserialize(value)
eval("foo = #{value}")
foo
end
def unsafeGetter(obj, path)
eval("obj.#{path}")
end
end

Просмотреть файл

@ -0,0 +1,17 @@
require 'json'
module BadMakeGetter
# Makes a class with a method named `getter_name` that returns `val`
def self.define_getter_class(getter_name, val)
new_class = Class.new
new_class.module_eval <<-END
def #{getter_name}
#{JSON.dump(val)}
end
END
new_class
end
end
one = BadMakeGetter.define_getter_class(:one, "foo")
puts "One is #{one.new.one}"

Просмотреть файл

@ -0,0 +1,13 @@
# Uses `define_method` instead of constructing a string
module GoodMakeGetter
def self.define_getter_class(getter_name, val)
new_class = Class.new
new_class.module_eval do
define_method(getter_name) { val }
end
new_class
end
end
two = GoodMakeGetter.define_getter_class(:two, "bar")
puts "Two is #{two.new.two}"

Просмотреть файл

@ -0,0 +1,11 @@
module Invoker
def attach(klass, name, target)
klass.module_eval <<-CODE
@@#{name} = target
def #{name}(*args)
@@#{name}.#{name}(*args)
end
CODE
end
end

Просмотреть файл

@ -0,0 +1,9 @@
module Invoker
def attach(klass, name, target)
var = :"@@#{name}"
klass.class_variable_set(var, target)
klass.define_method(name) do |*args|
self.class.class_variable_get(var).send(name, *args)
end
end
end

Просмотреть файл

@ -0,0 +1,11 @@
require 'json'
module MyLib
def safeDeserialize(value)
JSON.parse(value)
end
def safeGetter(obj, path)
obj.dig(*path.split("."))
end
end

Просмотреть файл

@ -0,0 +1,35 @@
edges
| impl/unsafeCode.rb:2:12:2:17 | target : | impl/unsafeCode.rb:3:17:3:25 | #{...} |
| impl/unsafeCode.rb:7:12:7:12 | x : | impl/unsafeCode.rb:8:30:8:30 | x |
| impl/unsafeCode.rb:12:12:12:12 | x : | impl/unsafeCode.rb:13:33:13:33 | x |
| impl/unsafeCode.rb:28:17:28:22 | my_arr : | impl/unsafeCode.rb:29:10:29:15 | my_arr |
| impl/unsafeCode.rb:32:21:32:21 | x : | impl/unsafeCode.rb:34:10:34:12 | arr |
| impl/unsafeCode.rb:37:15:37:15 | x : | impl/unsafeCode.rb:40:10:40:12 | arr |
| impl/unsafeCode.rb:37:15:37:15 | x : | impl/unsafeCode.rb:44:10:44:12 | arr |
| impl/unsafeCode.rb:47:15:47:15 | x : | impl/unsafeCode.rb:49:9:49:12 | #{...} |
nodes
| impl/unsafeCode.rb:2:12:2:17 | target : | semmle.label | target : |
| impl/unsafeCode.rb:3:17:3:25 | #{...} | semmle.label | #{...} |
| impl/unsafeCode.rb:7:12:7:12 | x : | semmle.label | x : |
| impl/unsafeCode.rb:8:30:8:30 | x | semmle.label | x |
| impl/unsafeCode.rb:12:12:12:12 | x : | semmle.label | x : |
| impl/unsafeCode.rb:13:33:13:33 | x | semmle.label | x |
| impl/unsafeCode.rb:28:17:28:22 | my_arr : | semmle.label | my_arr : |
| impl/unsafeCode.rb:29:10:29:15 | my_arr | semmle.label | my_arr |
| impl/unsafeCode.rb:32:21:32:21 | x : | semmle.label | x : |
| impl/unsafeCode.rb:34:10:34:12 | arr | semmle.label | arr |
| impl/unsafeCode.rb:37:15:37:15 | x : | semmle.label | x : |
| impl/unsafeCode.rb:40:10:40:12 | arr | semmle.label | arr |
| impl/unsafeCode.rb:44:10:44:12 | arr | semmle.label | arr |
| impl/unsafeCode.rb:47:15:47:15 | x : | semmle.label | x : |
| impl/unsafeCode.rb:49:9:49:12 | #{...} | semmle.label | #{...} |
subpaths
#select
| impl/unsafeCode.rb:3:17:3:25 | #{...} | impl/unsafeCode.rb:2:12:2:17 | target : | impl/unsafeCode.rb:3:17:3:25 | #{...} | This string interpolation which depends on $@ is later $@. | impl/unsafeCode.rb:2:12:2:17 | target | library input | impl/unsafeCode.rb:3:5:3:27 | call to eval | interpreted as code |
| impl/unsafeCode.rb:8:30:8:30 | x | impl/unsafeCode.rb:7:12:7:12 | x : | impl/unsafeCode.rb:8:30:8:30 | x | This string format which depends on $@ is later $@. | impl/unsafeCode.rb:7:12:7:12 | x | library input | impl/unsafeCode.rb:8:5:8:32 | call to eval | interpreted as code |
| impl/unsafeCode.rb:13:33:13:33 | x | impl/unsafeCode.rb:12:12:12:12 | x : | impl/unsafeCode.rb:13:33:13:33 | x | This string format which depends on $@ is later $@. | impl/unsafeCode.rb:12:12:12:12 | x | library input | impl/unsafeCode.rb:13:5:13:35 | call to eval | interpreted as code |
| impl/unsafeCode.rb:29:10:29:15 | my_arr | impl/unsafeCode.rb:28:17:28:22 | my_arr : | impl/unsafeCode.rb:29:10:29:15 | my_arr | This array which depends on $@ is later $@. | impl/unsafeCode.rb:28:17:28:22 | my_arr | library input | impl/unsafeCode.rb:29:5:29:27 | call to eval | interpreted as code |
| impl/unsafeCode.rb:34:10:34:12 | arr | impl/unsafeCode.rb:32:21:32:21 | x : | impl/unsafeCode.rb:34:10:34:12 | arr | This array which depends on $@ is later $@. | impl/unsafeCode.rb:32:21:32:21 | x | library input | impl/unsafeCode.rb:34:5:34:24 | call to eval | interpreted as code |
| impl/unsafeCode.rb:40:10:40:12 | arr | impl/unsafeCode.rb:37:15:37:15 | x : | impl/unsafeCode.rb:40:10:40:12 | arr | This array which depends on $@ is later $@. | impl/unsafeCode.rb:37:15:37:15 | x | library input | impl/unsafeCode.rb:40:5:40:24 | call to eval | interpreted as code |
| impl/unsafeCode.rb:44:10:44:12 | arr | impl/unsafeCode.rb:37:15:37:15 | x : | impl/unsafeCode.rb:44:10:44:12 | arr | This array which depends on $@ is later $@. | impl/unsafeCode.rb:37:15:37:15 | x | library input | impl/unsafeCode.rb:44:5:44:24 | call to eval | interpreted as code |
| impl/unsafeCode.rb:49:9:49:12 | #{...} | impl/unsafeCode.rb:47:15:47:15 | x : | impl/unsafeCode.rb:49:9:49:12 | #{...} | This string interpolation which depends on $@ is later $@. | impl/unsafeCode.rb:47:15:47:15 | x | library input | impl/unsafeCode.rb:51:5:51:13 | call to eval | interpreted as code |

Просмотреть файл

@ -0,0 +1 @@
queries/security/cwe-094/UnsafeCodeConstruction.ql

Просмотреть файл

@ -0,0 +1,53 @@
class Foobar
def foo1(target)
eval("foo = #{target}") # NOT OK
end
# sprintf
def foo2(x)
eval(sprintf("foo = %s", x)) # NOT OK
end
# String#%
def foo3(x)
eval("foo = %{foo}" % {foo: x}) # NOT OK
end
def indirect_eval(x)
eval(x) # OK - no construction.
end
def send_stuff(x)
foo.send("foo_#{x}") # OK - attacker cannot control entire string.
end
def named_code(code)
eval("def \n #{code} \n end") # OK - parameter is named code
end
def joinStuff(my_arr)
eval(my_arr.join("\n")) # NOT OK
end
def joinWithElemt(x)
arr = [x, "foobar"]
eval(arr.join("\n")) # NOT OK
end
def pushArr(x, y)
arr = []
arr.push(x)
eval(arr.join("\n")) # NOT OK
arr2 = []
arr2 << y
eval(arr.join("\n")) # NOT OK
end
def hereDoc(x)
foo = <<~HERE
#{x}
HERE
eval(foo) # NOT OK
end
end

Просмотреть файл

@ -0,0 +1,5 @@
Gem::Specification.new do |s|
s.name = 'unsafe-code'
s.require_path = "impl"
end