Merge branch 'main' into 13332-codeql-model-editor-csharp

This commit is contained in:
Felicity Chapman 2024-02-28 11:43:02 +00:00
Родитель 897786dc99 65ec80967e
Коммит 053e172d94
40 изменённых файлов: 12438 добавлений и 2927 удалений

Просмотреть файл

@ -2359,6 +2359,47 @@ class TranslatedDestructorFieldDestruction extends TranslatedNonConstantExpr, St
private TranslatedExpr getDestructorCall() { result = getTranslatedExpr(expr.getExpr()) }
}
/**
* The IR translation of a vacuous destructor call. That is, an expression that
* looks like a destructor call, but has no effect.
*
* Note that, even though there's no destructor call, we should still evaluate
* the qualifier.
*/
class TranslatedVacuousDestructorCall extends TranslatedNonConstantExpr {
override VacuousDestructorCall expr;
override Instruction getInstructionSuccessorInternal(InstructionTag tag, EdgeKind kind) { none() }
final TranslatedExpr getQualifier() {
result = getTranslatedExpr(expr.getQualifier().getFullyConverted())
}
override Instruction getFirstInstruction(EdgeKind kind) {
result = this.getQualifier().getFirstInstruction(kind)
}
override Instruction getChildSuccessorInternal(TranslatedElement child, EdgeKind kind) {
child = this.getQualifier() and
result = this.getParent().getChildSuccessor(this, kind)
}
override TranslatedElement getChildInternal(int id) {
id = 0 and
result = this.getQualifier()
}
override Instruction getResult() { none() }
override predicate hasInstruction(Opcode opcode, InstructionTag tag, CppType resultType) {
none()
}
override Instruction getALastInstructionInternal() {
result = this.getQualifier().getALastInstruction()
}
}
/**
* The IR translation of the `?:` operator. This class has the portions of the implementation that
* are shared between the standard three-operand form (`a ? b : c`) and the GCC-extension

Просмотреть файл

@ -11,7 +11,7 @@ private int getTypeSizeWorkaround(Type type) {
exists(Type unspecifiedType |
unspecifiedType = type.getUnspecifiedType() and
(
unspecifiedType instanceof FunctionReferenceType and
(unspecifiedType instanceof FunctionReferenceType or unspecifiedType instanceof RoutineType) and
result = getPointerSize()
or
exists(PointerToMemberType ptmType |
@ -176,7 +176,7 @@ private IRType getIRTypeForPRValue(Type type) {
isPointerIshType(unspecifiedType) and
result.(IRAddressType).getByteSize() = getTypeSize(unspecifiedType)
or
unspecifiedType instanceof FunctionPointerIshType and
(unspecifiedType instanceof FunctionPointerIshType or unspecifiedType instanceof RoutineType) and
result.(IRFunctionAddressType).getByteSize() = getTypeSize(type)
or
unspecifiedType instanceof VoidType and result instanceof IRVoidType

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -2189,6 +2189,26 @@ void static_variable_with_destructor_3() {
static ClassWithDestructor global_class_with_destructor;
namespace vacuous_destructor_call {
template<typename T>
T& get(T& t) { return t; }
template<typename T>
void call_destructor(T& t) {
get(t).~T();
}
void non_vacuous_destructor_call() {
ClassWithDestructor c;
call_destructor(c);
}
void vacuous_destructor_call() {
int i;
call_destructor(i);
}
}
void TryCatchDestructors(bool b) {
try {
String s;
@ -2296,4 +2316,19 @@ void VoidReturnDestructors() {
return VoidFunc();
}
namespace return_routine_type {
struct HasVoidToIntFunc
{
void VoidToInt(int);
};
typedef void (HasVoidToIntFunc::*VoidToIntMemberFunc)(int);
static VoidToIntMemberFunc GetVoidToIntFunc()
{
return &HasVoidToIntFunc::VoidToInt;
}
}
// semmle-extractor-options: -std=c++20 --clang

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -7,7 +7,6 @@ missingOperandType
duplicateChiOperand
sideEffectWithoutPrimary
instructionWithoutSuccessor
| VacuousDestructorCall.cpp:2:29:2:29 | InitializeIndirection: y | Instruction 'InitializeIndirection: y' has no successors in function '$@'. | VacuousDestructorCall.cpp:2:6:2:6 | void CallDestructor<int>(int, int*) | void CallDestructor<int>(int, int*) |
| ms_try_mix.cpp:35:13:35:19 | ThrowValue: throw ... | Instruction 'ThrowValue: throw ...' has no successors in function '$@'. | ms_try_mix.cpp:29:6:29:19 | void ms_finally_mix(int) | void ms_finally_mix(int) |
| ms_try_mix.cpp:53:5:53:11 | ThrowValue: throw ... | Instruction 'ThrowValue: throw ...' has no successors in function '$@'. | ms_try_mix.cpp:49:6:49:28 | void ms_empty_finally_at_end() | void ms_empty_finally_at_end() |
| statements.cpp:25:5:25:9 | ReThrow: re-throw exception | Instruction 'ReThrow: re-throw exception ' has no successors in function '$@'. | statements.cpp:21:6:21:16 | void early_throw(int) | void early_throw(int) |

Просмотреть файл

@ -8,9 +8,6 @@ missingOperandType
duplicateChiOperand
sideEffectWithoutPrimary
instructionWithoutSuccessor
| VacuousDestructorCall.cpp:2:29:2:29 | InitializeIndirection: y | Instruction 'InitializeIndirection: y' has no successors in function '$@'. | VacuousDestructorCall.cpp:2:6:2:6 | void CallDestructor<int>(int, int*) | void CallDestructor<int>(int, int*) |
| VacuousDestructorCall.cpp:3:3:3:3 | VariableAddress: x | Instruction 'VariableAddress: x' has no successors in function '$@'. | VacuousDestructorCall.cpp:2:6:2:6 | void CallDestructor<int>(int, int*) | void CallDestructor<int>(int, int*) |
| VacuousDestructorCall.cpp:4:3:4:3 | Load: y | Instruction 'Load: y' has no successors in function '$@'. | VacuousDestructorCall.cpp:2:6:2:6 | void CallDestructor<int>(int, int*) | void CallDestructor<int>(int, int*) |
| ms_try_mix.cpp:35:13:35:19 | ThrowValue: throw ... | Instruction 'ThrowValue: throw ...' has no successors in function '$@'. | ms_try_mix.cpp:29:6:29:19 | void ms_finally_mix(int) | void ms_finally_mix(int) |
| ms_try_mix.cpp:53:5:53:11 | ThrowValue: throw ... | Instruction 'ThrowValue: throw ...' has no successors in function '$@'. | ms_try_mix.cpp:49:6:49:28 | void ms_empty_finally_at_end() | void ms_empty_finally_at_end() |
| statements.cpp:25:5:25:9 | ReThrow: re-throw exception | Instruction 'ReThrow: re-throw exception ' has no successors in function '$@'. | statements.cpp:21:6:21:16 | void early_throw(int) | void early_throw(int) |
@ -33,7 +30,6 @@ multipleIRTypes
lostReachability
backEdgeCountMismatch
useNotDominatedByDefinition
| VacuousDestructorCall.cpp:2:29:2:29 | Address | Operand 'Address' is not dominated by its definition in function '$@'. | VacuousDestructorCall.cpp:2:6:2:6 | void CallDestructor<int>(int, int*) | void CallDestructor<int>(int, int*) |
| ms_try_except.cpp:9:19:9:19 | Left | Operand 'Left' is not dominated by its definition in function '$@'. | ms_try_except.cpp:2:6:2:18 | void ms_try_except(int) | void ms_try_except(int) |
| ms_try_except.cpp:9:19:9:19 | Left | Operand 'Left' is not dominated by its definition in function '$@'. | ms_try_except.cpp:2:6:2:18 | void ms_try_except(int) | void ms_try_except(int) |
| ms_try_except.cpp:19:17:19:21 | Left | Operand 'Left' is not dominated by its definition in function '$@'. | ms_try_except.cpp:2:6:2:18 | void ms_try_except(int) | void ms_try_except(int) |

Просмотреть файл

@ -7,7 +7,6 @@ missingOperandType
duplicateChiOperand
sideEffectWithoutPrimary
instructionWithoutSuccessor
| VacuousDestructorCall.cpp:2:29:2:29 | InitializeIndirection: y | Instruction 'InitializeIndirection: y' has no successors in function '$@'. | VacuousDestructorCall.cpp:2:6:2:6 | void CallDestructor<int>(int, int*) | void CallDestructor<int>(int, int*) |
| ms_try_mix.cpp:35:13:35:19 | ThrowValue: throw ... | Instruction 'ThrowValue: throw ...' has no successors in function '$@'. | ms_try_mix.cpp:29:6:29:19 | void ms_finally_mix(int) | void ms_finally_mix(int) |
| ms_try_mix.cpp:53:5:53:11 | ThrowValue: throw ... | Instruction 'ThrowValue: throw ...' has no successors in function '$@'. | ms_try_mix.cpp:49:6:49:28 | void ms_empty_finally_at_end() | void ms_empty_finally_at_end() |
| statements.cpp:25:5:25:9 | ReThrow: re-throw exception | Instruction 'ReThrow: re-throw exception ' has no successors in function '$@'. | statements.cpp:21:6:21:16 | void early_throw(int) | void early_throw(int) |

Просмотреть файл

@ -0,0 +1,7 @@
class Modifiable extends @modifiable {
Modifiable() { compiler_generated(this) }
string toString() { none() }
}
select any(Modifiable m)

Просмотреть файл

@ -0,0 +1,7 @@
class Expression extends @expr {
Expression() { compiler_generated(this) }
string toString() { none() }
}
select any(Expression e)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,4 @@
description: Split `compiler_generated` into `expr_compiler_generated` and `compiler_generated`.
compatibility: backwards
compiler_generated.rel: run compiler_generated.qlo
expr_compiler_generated.rel: run expr_compiler_generated.qlo

Просмотреть файл

@ -54,7 +54,7 @@ namespace Semmle.Extraction.CSharp.Entities
}
if (info.IsCompilerGenerated)
trapFile.expr_compiler_generated(this);
trapFile.compiler_generated(this);
if (info.ExprValue is string value)
trapFile.expr_value(this, value);

Просмотреть файл

@ -9,12 +9,14 @@ namespace Semmle.Extraction.CSharp.Entities
{
protected readonly TSyntax Stmt;
private readonly Location location;
private readonly bool isCompilerGenerated;
protected Statement(Context cx, TSyntax stmt, Kinds.StmtKind kind, IStatementParentEntity parent, int child, Location location, bool isCompilerGenerated = false)
: base(cx, kind, parent, child)
{
Stmt = stmt;
this.location = location;
this.isCompilerGenerated = isCompilerGenerated;
if (!isCompilerGenerated)
{
cx.BindComments(this, location.Symbol);
@ -29,6 +31,11 @@ namespace Semmle.Extraction.CSharp.Entities
base.Populate(trapFile);
trapFile.stmt_location(this, location);
if (isCompilerGenerated)
{
trapFile.compiler_generated(this);
}
}
public override Microsoft.CodeAnalysis.Location ReportingLocation => Stmt.GetLocation();

Просмотреть файл

@ -164,9 +164,6 @@ namespace Semmle.Extraction.CSharp
internal static void expr_call(this TextWriter trapFile, Expression expr, Method target) =>
trapFile.WriteTuple("expr_call", expr, target);
internal static void expr_compiler_generated(this TextWriter trapFile, Expression expr) =>
trapFile.WriteTuple("expr_compiler_generated", expr);
internal static void expr_flowstate(this TextWriter trapFile, Expression expr, int flowState) =>
trapFile.WriteTuple("expr_flowstate", expr, flowState);

Просмотреть файл

@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* C#: The table `expr_compiler_generated` has been deleted and its content has been added to `compiler_generated`.

Просмотреть файл

@ -27,7 +27,8 @@ class PrintAstConfiguration extends TPrintAstConfiguration {
}
private predicate shouldPrint(Element e, Location l) {
exists(PrintAstConfiguration config | config.shouldPrint(e, l))
exists(PrintAstConfiguration config | config.shouldPrint(e, l)) and
not e.(Stmt).isCompilerGenerated()
}
private predicate isImplicitExpression(ControlFlowElement element) {

Просмотреть файл

@ -43,6 +43,9 @@ class Stmt extends ControlFlowElement, @stmt {
* For example converts `{ { return x; } }` to `return x;`.
*/
Stmt stripSingletonBlocks() { result = this }
/** Holds if this statement is compiler generated. */
predicate isCompilerGenerated() { compiler_generated(this) }
}
/**

Просмотреть файл

@ -65,7 +65,7 @@ class Expr extends DotNet::Expr, ControlFlowElement, @expr {
* Holds if this expression is generated by the compiler and does not appear
* explicitly in the source code.
*/
predicate isImplicit() { expr_compiler_generated(this) }
predicate isImplicit() { compiler_generated(this) }
/**
* Gets an expression that is the result of stripping (recursively) all

Просмотреть файл

@ -682,8 +682,6 @@ has_modifiers(
int id: @modifiable_direct ref,
int mod_id: @modifier ref);
compiler_generated(unique int id: @modifiable ref);
/** MEMBERS **/
@member = @method | @constructor | @destructor | @field | @property | @event | @operator | @indexer | @type;
@ -1271,9 +1269,6 @@ mutator_invocation_mode(
unique int id: @operator_invocation_expr ref,
int mode: int ref /* prefix = 1, postfix = 2*/);
expr_compiler_generated(
unique int id: @expr ref);
expr_value(
unique int id: @expr ref,
string value: string ref);
@ -1316,6 +1311,10 @@ lambda_expr_return_type(
unique int id: @lambda_expr ref,
int type_id: @type_or_ref ref);
/* Compiler generated */
compiler_generated(unique int id: @element ref);
/** CONTROL/DATA FLOW **/
@control_flow_element = @stmt | @expr;

Просмотреть файл

@ -0,0 +1,7 @@
class Element extends @element {
Element() { expr_compiler_generated(this) or compiler_generated(this) }
string toString() { none() }
}
select any(Element e)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,4 @@
description: Merge `expr_compiler_generated` into `compiler_generated` and add support for compiler generated statements.
compatibility: backwards
compiler_generated.rel: run compiler_generated.qlo
expr_compiler_generated.rel: delete

Просмотреть файл

@ -25,7 +25,6 @@ constructors.cs:
# 23| -1: [TypeMention] object
# 23| 1: [Parameter] s
# 23| -1: [TypeMention] string
# 23| 4: [BlockStmt] {...}
# 25| 5: [InstanceConstructor] C1
#-----| 2: (Parameters)
# 25| 0: [Parameter] o
@ -48,4 +47,3 @@ constructors.cs:
# 28| 3: [ConstructorInitializer] call to constructor C1
# 28| 0: [ParameterAccess] access to parameter o
# 28| 1: [ParameterAccess] access to parameter s
# 28| 4: [BlockStmt] {...}

Просмотреть файл

@ -884,7 +884,6 @@ Record.cs:
# 27| -1: [TypeMention] string
# 27| 1: [Parameter] LastName
# 27| -1: [TypeMention] string
# 27| 4: [BlockStmt] {...}
# 27| 16: [Property] FirstName
# 27| 3: [Getter] get_FirstName
# 27| 4: [Setter] set_FirstName
@ -917,7 +916,6 @@ Record.cs:
# 30| 3: [ConstructorInitializer] call to constructor Person1
# 30| 0: [ParameterAccess] access to parameter FirstName
# 30| 1: [ParameterAccess] access to parameter LastName
# 29| 4: [BlockStmt] {...}
# 29| 17: [Property] Subject
# 29| 3: [Getter] get_Subject
# 29| 4: [Setter] set_Subject
@ -945,7 +943,6 @@ Record.cs:
# 33| 3: [ConstructorInitializer] call to constructor Person1
# 33| 0: [ParameterAccess] access to parameter FirstName
# 33| 1: [ParameterAccess] access to parameter LastName
# 32| 4: [BlockStmt] {...}
# 32| 17: [Property] Level
# 32| 3: [Getter] get_Level
# 32| 4: [Setter] set_Level
@ -966,7 +963,6 @@ Record.cs:
#-----| 2: (Parameters)
# 35| 0: [Parameter] Name
# 35| -1: [TypeMention] string
# 35| 4: [BlockStmt] {...}
# 35| 16: [Property] Name
# 35| 3: [Getter] get_Name
# 35| 4: [Setter] set_Name
@ -993,7 +989,6 @@ Record.cs:
# 41| -1: [TypeMention] string
# 41| 3: [ConstructorInitializer] call to constructor Pet
# 41| 0: [ParameterAccess] access to parameter Name
# 41| 4: [BlockStmt] {...}
# 41| 15: [Property] EqualityContract
# 41| 3: [Getter] get_EqualityContract
# 43| 16: [Method] WagTail
@ -1035,7 +1030,6 @@ Record.cs:
#-----| 2: (Parameters)
# 54| 0: [Parameter] A
# 54| -1: [TypeMention] string
# 54| 4: [BlockStmt] {...}
# 54| 16: [Property] A
# 54| 3: [Getter] get_A
# 54| 4: [Setter] set_A
@ -1060,7 +1054,6 @@ Record.cs:
# 56| -1: [TypeMention] string
# 56| 3: [ConstructorInitializer] call to constructor R1
# 56| 0: [ParameterAccess] access to parameter A
# 56| 4: [BlockStmt] {...}
# 56| 17: [Property] B
# 56| 3: [Getter] get_B
# 56| 4: [Setter] set_B

Просмотреть файл

@ -369,7 +369,6 @@ Tuples.cs:
# 95| -1: [TypeMention] string
# 95| 1: [Parameter] j
# 95| -1: [TypeMention] int
# 95| 4: [BlockStmt] {...}
# 95| 16: [Property] i
# 95| 3: [Getter] get_i
# 95| 4: [Setter] set_i

Просмотреть файл

@ -2411,7 +2411,6 @@ expressions.cs:
#-----| 2: (Parameters)
# 518| 0: [Parameter] oc1
# 518| -1: [TypeMention] object
# 518| 4: [BlockStmt] {...}
# 520| 23: [Class] ClassC2
#-----| 3: (Base types)
# 520| 0: [TypeMention] ClassC1
@ -2421,4 +2420,3 @@ expressions.cs:
# 520| -1: [TypeMention] object
# 520| 3: [ConstructorInitializer] call to constructor ClassC1
# 520| 0: [ParameterAccess] access to parameter oc2
# 520| 4: [BlockStmt] {...}

Просмотреть файл

@ -0,0 +1,3 @@
| fixed.cs:3:7:3:11 | {...} | fixed.cs:3:7:3:11 | Fixed |
| statements.cs:7:11:7:15 | {...} | statements.cs:7:11:7:15 | Class |
| statements.cs:243:15:243:25 | {...} | statements.cs:243:15:243:25 | AccountLock |

Просмотреть файл

@ -0,0 +1,5 @@
import csharp
from Stmt stmt
where stmt.isCompilerGenerated()
select stmt, stmt.getEnclosingCallable()

Просмотреть файл

@ -11,6 +11,7 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat
basic-query-for-csharp-code
codeql-library-for-csharp
analyzing-data-flow-in-csharp
customizing-library-models-for-csharp
- :doc:`Basic query for C# code <basic-query-for-csharp-code>`: Learn to write and run a simple CodeQL query.
@ -18,4 +19,5 @@ Experiment and learn how to write effective and efficient queries for CodeQL dat
- :doc:`Analyzing data flow in C# <analyzing-data-flow-in-csharp>`: You can use CodeQL to track the flow of data through a C# program to its use.
- :doc:`Customizing library models for C# <customizing-library-models-for-csharp>`: You can model frameworks and libraries that your codebase depends on using data extensions and publish them as CodeQL model packs.

Просмотреть файл

@ -0,0 +1,342 @@
.. _customizing-library-models-for-csharp:
Customizing library models for C#
=================================
You can model the methods and callables that control data flow in any framework or library. This is especially useful for custom frameworks or niche libraries, that are not supported by the standard CodeQL libraries.
.. include:: ../reusables/beta-note-customizing-library-models.rst
About this article
------------------
This article contains reference material about how to define custom models for sources, sinks, and flow summaries for C# dependencies in data extension files.
About data extensions
---------------------
You can customize analysis by defining models (summaries, sinks, and sources) of your code's C#/.NET dependencies in data extension files. Each model defines the behavior of one or more elements of your library or framework, such as methods, properties, and callables. When you run dataflow analysis, these models expand the potential sources and sinks tracked by dataflow analysis and improve the precision of results.
Most of the security queries search for paths from a source of untrusted input to a sink that represents a vulnerability. This is known as taint tracking. Each source is a starting point for dataflow analysis to track tainted data and each sink is an end point.
Taint tracking queries also need to know how data can flow through elements that are not included in the source code. These are modeled as summaries. A summary model enables queries to synthesize the flow behavior through elements in dependency code that is not stored in your repository.
Syntax used to define an element in an extension file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Each model of an element is defined using a data extension where each tuple constitutes a model.
A data extension file to extend the standard C# queries included with CodeQL is a YAML file with the form:
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: <name of extensible predicate>
data:
- <tuple1>
- <tuple2>
- ...
Each YAML file may contain one or more top-level extensions.
- ``addsTo`` defines the CodeQL pack name and extensible predicate that the extension is injected into.
- ``data`` defines one or more rows of tuples that are injected as values into the extensible predicate. The number of columns and their types must match the definition of the extensible predicate.
Data extensions use union semantics, which means that the tuples of all extensions for a single extensible predicate are combined, duplicates are removed, and all of the remaining tuples are queryable by referencing the extensible predicate.
Publish data extension files in a CodeQL model pack to share
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
You can group one or more data extension files into a CodeQL model pack and publish it to the GitHub Container Registry. This makes it easy for anyone to download the model pack and use it to extend their analysis. For more information, see `Creating a CodeQL model pack <https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack>`__ and `Publishing and using CodeQL packs <https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/publishing-and-using-codeql-packs/>`__ in the CodeQL CLI documentation.
Extensible predicates used to create custom models in C#
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The CodeQL library for C# analysis exposes the following extensible predicates:
- ``sourceModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance)``. This is used to model sources of potentially tainted data.
- ``sinkModel(namespace, type, subtypes, name, signature, ext, input, kind, provenance)``. This is used to model sinks where tainted data may be used in a way that makes the code vulnerable.
- ``summaryModel(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance)``. This is used to model flow through elements.
- ``neutralModel(namespace, type, name, signature, kind, provenance)``. This is similar to a summary model but used to model the flow of values that have only a minor impact on the dataflow analysis. Manual neutral models (those with a provenance such as ``manual`` or ``ai-manual``) can be used to override generated summary models (those with a provenance such as ``df-generated``), so that the summary model will be ignored. Other than that, neutral models have no effect.
The extensible predicates are populated using the models defined in data extension files.
Examples of custom model definitions
------------------------------------
The examples in this section are taken from the standard CodeQL C# query pack published by GitHub. They demonstrate how to add tuples to extend extensible predicates that are used by the standard queries.
Example: Taint sink in the ``System.Data.SqlClient`` namespace
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example shows how the C# query pack models the argument of the ``SqlCommand`` constructor as a SQL injection sink.
This is the constructor of the ``SqlCommand`` class, which is located in the ``System.Data.SqlClient`` namespace.
.. code-block:: csharp
public static void TaintSink(SqlConnection conn, string query) {
SqlCommand command = new SqlCommand(query, connection) // The argument to this method is a SQL injection sink.
...
}
We need to add a tuple to the ``sinkModel``\(namespace, type, subtypes, name, signature, ext, input, kind, provenance) extensible predicate by updating a data extension file.
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: sinkModel
data:
- ["System.Data.SqlClient", "SqlCommand", False, "SqlCommand", "(System.String,System.Data.SqlClient.SqlConnection)", "", "Argument[0]", "sql-injection", "manual"]
Since we want to add a new sink, we need to add a tuple to the ``sinkModel`` extensible predicate.
The first five values identify the callable (in this case a method) to be modeled as a sink.
- The first value ``System.Data.SqlClient`` is the namespace name.
- The second value ``SqlCommand`` is the name of the class (type) that contains the method.
- The third value ``False`` is a flag that indicates whether or not the sink also applies to all overrides of the method.
- The fourth value ``SqlCommand`` is the method name. Constructors are named after the class.
- The fifth value ``(System.String,System.Data.SqlClient.SqlConnection)`` is the method input type signature. The type names must be fully qualified.
The sixth value should be left empty and is out of scope for this documentation.
The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the sink.
- The seventh value ``Argument[0]`` is the ``access path`` to the first argument passed to the method, which means that this is the location of the sink.
- The eighth value ``sql-injection`` is the kind of the sink. The sink kind is used to define the queries where the sink is in scope. In this case - the SQL injection queries.
- The ninth value ``manual`` is the provenance of the sink, which is used to identify the origin of the sink.
Example: Taint source from the ``System.Net.Sockets`` namespace
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example shows how the C# query pack models the return value from the ``GetStream`` method as a ``remote`` source.
This is the ``GetStream`` method in the ``TcpClient`` class, which is located in the ``System.Net.Sockets`` namespace.
.. code-block:: csharp
public static void Tainted(TcpClient client) {
NetworkStream stream = client.GetStream(); // The return value of this method is a remote source of taint.
...
}
We need to add a tuple to the ``sourceModel``\(namespace, type, subtypes, name, signature, ext, output, kind, provenance) extensible predicate by updating a data extension file.
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: sourceModel
data:
- ["System.Net.Sockets", "TcpClient", False, "GetStream", "()", "", "ReturnValue", "remote", "manual"]
Since we are adding a new source, we need to add a tuple to the ``sourceModel`` extensible predicate.
The first five values identify the callable (in this case a method) to be modeled as a source.
- The first value ``System.Net.Sockets`` is the namespace name.
- The second value ``TcpClient`` is the name of the class (type) that contains the source.
- The third value ``False`` is a flag that indicates whether or not the source also applies to all overrides of the method.
- The fourth value ``GetStream`` is the method name.
- The fifth value ``()`` is the method input type signature.
The sixth value should be left empty and is out of scope for this documentation.
The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the source.
- The seventh value ``ReturnValue`` is the access path to the return of the method, which means that it is the return value that should be considered a source of tainted input.
- The eighth value ``remote`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. As an example the SQL injection query uses ``remote`` sources.
- The ninth value ``manual`` is the provenance of the source, which is used to identify the origin of the source.
Example: Add flow through the ``Concat`` method
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example shows how the C# query pack models flow through a method for a simple case.
This pattern covers many of the cases where we need to summarize flow through a method that is stored in a library or framework outside the repository.
.. code-block:: csharp
public static void TaintFlow(string s1, string s2) {
string t = String.Concat(s1, s2); // There is taint flow from s1 and s2 to t.
...
}
We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file:
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: summaryModel
data:
- ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0]", "ReturnValue", "taint", "manual"]
- ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[1]", "ReturnValue", "taint", "manual"]
Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate.
Each tuple defines flow from one argument to the return value.
The first row defines flow from the first argument (``s1`` in the example) to the return value (``t`` in the example) and the second row defines flow from the second argument (``s2`` in the example) to the return value (``t`` in the example).
The first five values identify the callable (in this case a method) to be modeled as a summary.
These are the same for both of the rows above as we are adding two summaries for the same method.
- The first value ``System`` is the namespace name.
- The second value ``String`` is the class (type) name.
- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method.
- The fourth value ``Concat`` is the method name.
- The fifth value ``(System.Object,System.Object)`` is the method input type signature.
The sixth value should be left empty and is out of scope for this documentation.
The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary.
- The seventh value is the access path to the input (where data flows from). ``Argument[0]`` is the access path to the first argument (``s1`` in the example) and ``Argument[1]`` is the access path to the second argument (``s2`` in the example).
- The eighth value ``ReturnValue`` is the access path to the output (where data flows to), in this case ``ReturnValue``, which means that the input flows to the return value.
- The ninth value ``taint`` is the kind of the flow. ``taint`` means that taint is propagated through the call.
- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary.
It would also be possible to merge the two rows into one by using a comma-separated list in the seventh value. This would be useful if the method has many arguments and the flow is the same for all of them.
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: summaryModel
data:
- ["System", "String", False, "Concat", "(System.Object,System.Object)", "", "Argument[0,1]", "ReturnValue", "taint", "manual"]
This row defines flow from both the first and the second argument to the return value. The seventh value ``Argument[0,1]`` is shorthand for specifying an access path to both ``Argument[0]`` and ``Argument[1]``.
Example: Add flow through the ``Trim`` method
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example shows how the C# query pack models flow through a method for a simple case.
.. code-block:: csharp
public static void TaintFlow(string s) {
string t = s.Trim(); // There is taint flow from s to t.
...
}
We need to add a tuple to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file:
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: summaryModel
data:
- ["System", "String", False, "Trim", "()", "", "Argument[this]", "ReturnValue", "taint", "manual"]
Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate.
Each tuple defines flow from one argument to the return value.
The first row defines flow from the qualifier of the method call (``s1`` in the example) to the return value (``t`` in the example).
The first five values identify the callable (in this case a method) to be modeled as a summary.
These are the same for both of the rows above as we are adding two summaries for the same method.
- The first value ``System`` is the namespace name.
- The second value ``String`` is the class (type) name.
- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method.
- The fourth value ``Trim`` is the method name.
- The fifth value ``()`` is the method input type signature.
The sixth value should be left empty and is out of scope for this documentation.
The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary.
- The seventh value is the access path to the input (where data flows from). ``Argument[this]`` is the access path to the qualifier (``s`` in the example).
- The eighth value ``ReturnValue`` is the access path to the output (where data flows to), in this case ``ReturnValue``, which means that the input flows to the return value.
- The ninth value ``taint`` is the kind of the flow. ``taint`` means that taint is propagated through the call.
- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary.
Example: Add flow through the ``Select`` method
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example shows how the C# query pack models a more complex flow through a method.
Here we model flow through higher order methods and collection types, as well as how to handle extension methods and generics.
.. code-block:: csharp
public static void TaintFlow(IEnumerable<string> stream) {
IEnumerable<string> lines = stream.Select(item => item + "\n");
...
}
We need to add tuples to the ``summaryModel``\(namespace, type, subtypes, name, signature, ext, input, output, kind, provenance) extensible predicate by updating a data extension file:
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: summaryModel
data:
- ["System.Linq", "Enumerable", False, "Select<TSource,TResult>", "(System.Collections.Generic.IEnumerable<TSource>,System.Func<TSource,TResult>)", "", "Argument[0].Element", "Argument[1].Parameter[0]", "value", "manual"]
- ["System.Linq", "Enumerable", False, "Select<TSource,TResult>", "(System.Collections.Generic.IEnumerable<TSource>,System.Func<TSource,TResult>)", "", "Argument[1].ReturnValue", "ReturnValue.Element", "value", "manual"]
Since we are adding flow through a method, we need to add tuples to the ``summaryModel`` extensible predicate.
Each tuple defines part of the flow that comprises the total flow through the ``Select`` method.
The first five values identify the callable (in this case a method) to be modeled as a summary.
These are the same for both of the rows above as we are adding two summaries for the same method.
- The first value ``System.Linq`` is the namespace name.
- The second value ``Enumerable`` is the class (type) name.
- The third value ``False`` is a flag that indicates whether or not the summary also applies to all overrides of the method.
- The fourth value ``Select<TSource,TResult>`` is the method name, along with the type parameters for the method. The names of the generic type parameters provided in the model must match the names of the generic type parameters in the method signature in the source code.
- The fifth value ``(System.Collections.Generic.IEnumerable<TSource>,System.Func<TSource,TResult>)`` is the method input type signature. The generics in the signature must match the generics in the method signature in the source code.
The sixth value should be left empty and is out of scope for this documentation.
The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the summary definition.
- The seventh value is the access path to the ``input`` (where data flows from).
- The eighth value is the access path to the ``output`` (where data flows to).
For the first row:
- The seventh value is ``Argument[0].Element``, which is the access path to the elements of the qualifier (the elements of the enumerable ``stream`` in the example).
- The eight value is ``Argument[1].Parameter[0]``, which is the access path to the first parameter of the ``System.Func<TSource,TResult>`` argument of ``Select`` (the lambda parameter ``item`` in the example).
For the second row:
- The seventh value is ``Argument[1].ReturnValue``, which is the access path to the return value of the ``System.Func<TSource,TResult>`` argument of ``Select`` (the return value of the lambda in the example).
- The eighth value is ``ReturnValue.Element``, which is the access path to the elements of the return value of ``Select`` (the elements of the enumerable ``lines`` in the example).
For the remaining values for both rows:
- The ninth value ``value`` is the kind of the flow. ``value`` means that the value is preserved.
- The tenth value ``manual`` is the provenance of the summary, which is used to identify the origin of the summary.
That is, the first row specifies that values can flow from the elements of the qualifier enumerable into the first argument of the function provided to ``Select``. The second row specifies that values can flow from the return value of the function to the elements of the enumerable returned from ``Select``.
Example: Add a ``neutral`` method
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This example shows how we can model a method as being neutral with respect to flow. We will also cover how to model a property by modeling the getter of the ``Now`` property of the ``DateTime`` class as neutral.
A neutral model is used to define that there is no flow through a method.
.. code-block:: csharp
public static void TaintFlow() {
System.DateTime t = System.DateTime.Now; // There is no flow from Now to t.
...
}
We need to add a tuple to the ``neutralModel``\(namespace, type, name, signature, kind, provenance) extensible predicate by updating a data extension file.
.. code-block:: yaml
extensions:
- addsTo:
pack: codeql/csharp-all
extensible: neutralModel
data:
- ["System", "DateTime", "get_Now", "()", "summary", "manual"]
Since we are adding a neutral model, we need to add tuples to the ``neutralModel`` extensible predicate.
The first four values identify the callable (in this case the getter of the ``Now`` property) to be modeled as a neutral, the fifth value is the kind, and the sixth value is the provenance (origin) of the neutral.
- The first value ``System`` is the namespace name.
- The second value ``DateTime`` is the class (type) name.
- The third value ``get_Now`` is the method name. Getter and setter methods are named ``get_<name>`` and ``set_<name>`` respectively.
- The fourth value ``()`` is the method input type signature.
- The fifth value ``summary`` is the kind of the neutral.
- The sixth value ``manual`` is the provenance of the neutral.

Просмотреть файл

@ -7,7 +7,7 @@ You can model the methods and callables that control data flow in any framework
.. include:: ../reusables/kotlin-beta-note.rst
.. include:: ../reusables/beta-note-model-packs-java.rst
.. include:: ../reusables/beta-note-customizing-library-models.rst
About this article
------------------
@ -61,10 +61,10 @@ Extensible predicates used to create custom models in Java and Kotlin
The CodeQL library for Java and Kotlin analysis exposes the following extensible predicates:
- ``sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance)``. This is used to model sources of potentially tainted data. The ``kind`` of the sources defined using this predicate determine which threat model they are associated with. Different threat models can be used to customize the sources used in an analysis. For more information, see ":ref:`Threat models <threat-models>`."
- ``sourceModel(package, type, subtypes, name, signature, ext, output, kind, provenance)``. This is used to model sources of potentially tainted data. The ``kind`` of the sources defined using this predicate determine which threat model they are associated with. Different threat models can be used to customize the sources used in an analysis. For more information, see ":ref:`Threat models <threat-models-java>`."
- ``sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance)``. This is used to model sinks where tainted data maybe used in a way that makes the code vulnerable.
- ``summaryModel(package, type, subtypes, name, signature, ext, input, output, kind, provenance)``. This is used to model flow through elements.
- ``neutralModel(package, type, name, signature, kind, provenance)``. This is similar to a summary model but used to model the flow of values that have only a minor impact on the dataflow analysis.
- ``neutralModel(package, type, name, signature, kind, provenance)``. This is similar to a summary model but used to model the flow of values that have only a minor impact on the dataflow analysis. Manual neutral models (those with a provenance such as ``manual`` or ``ai-manual``) override generated summary models (those with a provenance such as ``df-generated``) so that the summary will be ignored. Other than that, neutral models have a slight impact on the dataflow dispatch logic, which is out of scope for this documentation.
The extensible predicates are populated using the models defined in data extension files.
@ -151,7 +151,7 @@ The sixth value should be left empty and is out of scope for this documentation.
The remaining values are used to define the ``access path``, the ``kind``, and the ``provenance`` (origin) of the source.
- The seventh value ``ReturnValue`` is the access path to the return of the method, which means that it is the return value that should be considered a source of tainted input.
- The eighth value ``remote`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. As an example the SQL injection query uses ``remote`` sources. For more information, see ":ref:`Threat models <threat-models>`."
- The eighth value ``remote`` is the kind of the source. The source kind is used to define the threat model where the source is in scope. ``remote`` applies to many of the security related queries as it means a remote source of untrusted data. As an example the SQL injection query uses ``remote`` sources. For more information, see ":ref:`Threat models <threat-models-java>`."
- The ninth value ``manual`` is the provenance of the source, which is used to identify the origin of the source.
Example: Add flow through the ``concat`` method
@ -292,7 +292,7 @@ The first four values identify the callable (in this case a method) to be modele
- The fifth value ``summary`` is the kind of the neutral.
- The sixth value ``manual`` is the provenance of the neutral.
.. _threat-models:
.. _threat-models-java:
Threat models
-------------

Просмотреть файл

@ -8,7 +8,7 @@ Extensible predicates and their interaction with data extensions
You can use data extensions to model the methods and callables that control dataflow in any framework or library. This is especially useful for custom frameworks or niche libraries, that are not supported by the standard CodeQL libraries.
.. include:: ../reusables/beta-note-model-packs-java.rst
.. include:: ../reusables/beta-note-customizing-library-models.rst
About this article
------------------

4
ql/Cargo.lock сгенерированный
Просмотреть файл

@ -679,9 +679,9 @@ dependencies = [
[[package]]
name = "rayon"
version = "1.8.1"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051"
checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd"
dependencies = [
"either",
"rayon-core",

Просмотреть файл

@ -8,5 +8,5 @@ edition = "2018"
[dependencies]
lazy_static = "1.4.0"
chrono = "0.4.34"
rayon = "1.8.1"
rayon = "1.9.0"
regex = "1.10.3"

Просмотреть файл

@ -15,6 +15,6 @@ tree-sitter-json = {git = "https://github.com/tausbn/tree-sitter-json.git", rev
clap = { version = "4.2", features = ["derive"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
rayon = "1.8.1"
rayon = "1.9.0"
regex = "1.10.3"
codeql-extractor = { path = "../../shared/tree-sitter-extractor" }