Merge pull request #15820 from MathiasVP/add-type-confusion-query

C++: Add a new query for detecting type confusion vulnerabilities
This commit is contained in:
Mathias Vorreiter Pedersen 2024-03-12 10:29:22 +00:00 коммит произвёл GitHub
Родитель 9c51514bd9 7b0df57d7a
Коммит 1a42e55095
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
9 изменённых файлов: 665 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,50 @@
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
<qhelp>
<overview>
<p>
Certain casts in C and C++ place no restrictions on the target type. For
example, C style casts such as <code>(MyClass*)p</code> allows the programmer
to cast any pointer <code>p</code> to an expression of type <code>MyClass*</code>.
If the runtime type of <code>p</code> turns out to be a type that's incompatible
with <code>MyClass</code>, this results in undefined behavior.
</p>
</overview>
<recommendation>
<p>
If possible, use <code>dynamic_cast</code> to safely cast between polymorphic types.
If <code>dynamic_cast</code> is not an option, use <code>static_cast</code> to restrict
the kinds of conversions that the compiler is allowed to perform. If C++ style casts is
not an option, carefully check that all casts are safe.
</p>
</recommendation>
<example>
<p>
Consider the following class hierachy where we define a base class <code>Shape</code> and two
derived classes <code>Circle</code> and <code>Square</code> that are mutually incompatible:
</p>
<sample src="TypeConfusionCommon.cpp"/>
<p>
The following code demonstrates a type confusion vulnerability where the programmer
assumes that the runtime type of <code>p</code> is always a <code>Square</code>.
However, if <code>p</code> is a <code>Circle</code>, the cast will result in undefined behavior.
</p>
<sample src="TypeConfusionBad.cpp"/>
<p>
The following code fixes the vulnerability by using <code>dynamic_cast</code> to
safely cast between polymorphic types. If the cast fails, <code>dynamic_cast</code>
returns a null pointer, which can be checked for and handled appropriately.
</p>
<sample src="TypeConfusionGood.cpp"/>
</example>
<references>
<li>
Microsoft Learn: <a href="https://learn.microsoft.com/en-us/cpp/cpp/type-conversions-and-type-safety-modern-cpp">Type conversions and type safety</a>.
</li>
</references>
</qhelp>

Просмотреть файл

@ -0,0 +1,263 @@
/**
* @name Type confusion
* @description Casting a value to an incompatible type can lead to undefined behavior.
* @kind path-problem
* @problem.severity warning
* @security-severity 9.3
* @precision medium
* @id cpp/type-confusion
* @tags security
* external/cwe/cwe-843
*/
import cpp
import semmle.code.cpp.dataflow.new.DataFlow
import Flow::PathGraph
/**
* Holds if `f` is a field located at byte offset `offset` in `c`.
*
* Note that predicate is recursive, so that given the following:
* ```cpp
* struct S1 {
* int a;
* void* b;
* };
*
* struct S2 {
* S1 s1;
* char c;
* };
* ```
* both `hasAFieldWithOffset(S2, s1, 0)` and `hasAFieldWithOffset(S2, a, 0)`
* holds.
*/
predicate hasAFieldWithOffset(Class c, Field f, int offset) {
// Base case: `f` is a field in `c`.
f = c.getAField() and
offset = f.getByteOffset() and
not f.getUnspecifiedType().(Class).hasDefinition()
or
// Otherwise, we find the struct that is a field of `c` which then has
// the field `f` as a member.
exists(Field g |
g = c.getAField() and
// Find the field with the largest offset that's less than or equal to
// offset. That's the struct we need to search recursively.
g =
max(Field cand, int candOffset |
cand = c.getAField() and
candOffset = cand.getByteOffset() and
offset >= candOffset
|
cand order by candOffset
) and
hasAFieldWithOffset(g.getUnspecifiedType(), f, offset - g.getByteOffset())
)
}
/** Holds if `f` is the last field of its declaring class. */
predicate lastField(Field f) {
exists(Class c | c = f.getDeclaringType() |
f =
max(Field cand, int byteOffset |
cand.getDeclaringType() = c and byteOffset = f.getByteOffset()
|
cand order by byteOffset
)
)
}
/**
* Holds if there exists a field in `c2` at offset `offset` that's compatible
* with `f1`.
*/
bindingset[f1, offset, c2]
pragma[inline_late]
predicate hasCompatibleFieldAtOffset(Field f1, int offset, Class c2) {
exists(Field f2 | hasAFieldWithOffset(c2, f2, offset) |
// Let's not deal with bit-fields for now.
f2 instanceof BitField
or
f1.getUnspecifiedType().getSize() = f2.getUnspecifiedType().getSize()
or
lastField(f1) and
f1.getUnspecifiedType().getSize() <= f2.getUnspecifiedType().getSize()
)
}
/**
* Holds if `c1` is a prefix of `c2`.
*/
bindingset[c1, c2]
pragma[inline_late]
predicate prefix(Class c1, Class c2) {
not c1.isPolymorphic() and
not c2.isPolymorphic() and
if c1 instanceof Union
then
// If it's a union we just verify that one of it's variants is compatible with the other class
exists(Field f1, int offset |
// Let's not deal with bit-fields for now.
not f1 instanceof BitField and
hasAFieldWithOffset(c1, f1, offset)
|
hasCompatibleFieldAtOffset(f1, offset, c2)
)
else
forall(Field f1, int offset |
// Let's not deal with bit-fields for now.
not f1 instanceof BitField and
hasAFieldWithOffset(c1, f1, offset)
|
hasCompatibleFieldAtOffset(f1, offset, c2)
)
}
/**
* An unsafe cast is any explicit cast that is not
* a `dynamic_cast`.
*/
class UnsafeCast extends Cast {
private Class toType;
UnsafeCast() {
(
this instanceof CStyleCast
or
this instanceof StaticCast
or
this instanceof ReinterpretCast
) and
toType = this.getExplicitlyConverted().getUnspecifiedType().stripType() and
not this.isImplicit() and
exists(TypeDeclarationEntry tde |
tde = toType.getDefinition() and
not tde.isFromUninstantiatedTemplate(_)
)
}
Class getConvertedType() { result = toType }
/**
* Holds if the result of this cast can safely be interpreted as a value of
* type `t`.
*
* The compatibility rules are as follows:
*
* 1. the result of `(T)x` is compatible with the type `T` for any `T`
* 2. the result of `(T)x` is compatible with the type `U` for any `U` such
* that `U` is a subtype of `T`, or `T` is a subtype of `U`.
* 3. the result of `(T)x` is compatible with the type `U` if the list
* of fields of `T` is a prefix of the list of fields of `U`.
* For example, if `U` is `struct { unsigned char x; int y; };`
* and `T` is `struct { unsigned char uc; };`.
* 4. the result of `(T)x` is compatible with the type `U` if the list
* of fields of `U` is a prefix of the list of fields of `T`.
*
* Condition 4 is a bit controversial, since it assumes that the additional
* fields in `T` won't be accessed. This may result in some FNs.
*/
bindingset[this, t]
pragma[inline_late]
predicate compatibleWith(Type t) {
// Conition 1
t.stripType() = this.getConvertedType()
or
// Condition 3
prefix(this.getConvertedType(), t.stripType())
or
// Condition 4
prefix(t.stripType(), this.getConvertedType())
or
// Condition 2 (a)
t.stripType().(Class).getABaseClass+() = this.getConvertedType()
or
// Condition 2 (b)
t.stripType() = this.getConvertedType().getABaseClass+()
}
}
/**
* Holds if `source` is an allocation that allocates a value of type `type`.
*/
predicate isSourceImpl(DataFlow::Node source, Class type) {
exists(AllocationExpr alloc |
alloc = source.asExpr() and
type = alloc.getAllocatedElementType().stripType() and
not exists(
alloc
.(NewOrNewArrayExpr)
.getAllocator()
.(OperatorNewAllocationFunction)
.getPlacementArgument()
)
) and
exists(TypeDeclarationEntry tde |
tde = type.getDefinition() and
not tde.isFromUninstantiatedTemplate(_)
)
}
/** A configuration describing flow from an allocation to a potentially unsafe cast. */
module Config implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { isSourceImpl(source, _) }
predicate isBarrier(DataFlow::Node node) {
// We disable flow through global variables to reduce FPs from infeasible paths
node instanceof DataFlow::VariableNode
or
exists(Class c | c = node.getType().stripType() |
not c.hasDefinition()
or
exists(TypeDeclarationEntry tde |
tde = c.getDefinition() and
tde.isFromUninstantiatedTemplate(_)
)
)
}
predicate isSink(DataFlow::Node sink) { sink.asExpr() = any(UnsafeCast cast).getUnconverted() }
int fieldFlowBranchLimit() { result = 0 }
}
module Flow = DataFlow::Global<Config>;
predicate relevantType(DataFlow::Node sink, Class allocatedType) {
exists(DataFlow::Node source |
Flow::flow(source, sink) and
isSourceImpl(source, allocatedType)
)
}
predicate isSinkImpl(
DataFlow::Node sink, Class allocatedType, Type convertedType, boolean compatible
) {
exists(UnsafeCast cast |
relevantType(sink, allocatedType) and
sink.asExpr() = cast.getUnconverted() and
convertedType = cast.getConvertedType()
|
if cast.compatibleWith(allocatedType) then compatible = true else compatible = false
)
}
from
Flow::PathNode source, Flow::PathNode sink, Type badSourceType, Type sinkType,
DataFlow::Node sinkNode
where
Flow::flowPath(source, sink) and
sinkNode = sink.getNode() and
isSourceImpl(source.getNode(), badSourceType) and
isSinkImpl(sinkNode, badSourceType, sinkType, false) and
// If there is any flow that would result in a valid cast then we don't
// report an alert here. This reduces the number of FPs from infeasible paths
// significantly.
not exists(DataFlow::Node goodSource, Type goodSourceType |
isSourceImpl(goodSource, goodSourceType) and
isSinkImpl(sinkNode, goodSourceType, sinkType, true) and
Flow::flow(goodSource, sinkNode)
)
select sinkNode, source, sink, "Conversion from $@ to $@ is invalid.", badSourceType,
badSourceType.toString(), sinkType, sinkType.toString()

Просмотреть файл

@ -0,0 +1,7 @@
void allocate_and_draw_bad() {
Shape* shape = new Circle;
// ...
// BAD: Assumes that shape is always a Square
Square* square = static_cast<Square*>(shape);
int length = square->getLength();
}

Просмотреть файл

@ -0,0 +1,25 @@
struct Shape {
virtual ~Shape();
virtual void draw() = 0;
};
struct Circle : public Shape {
Circle();
void draw() override {
/* ... */
}
int getRadius();
};
struct Square : public Shape {
Square();
void draw() override {
/* ... */
}
int getLength();
};

Просмотреть файл

@ -0,0 +1,11 @@
void allocate_and_draw_good() {
Shape* shape = new Circle;
// ...
// GOOD: Dynamically checks if shape is a Square
Square* square = dynamic_cast<Square*>(shape);
if(square) {
int length = square->getLength();
} else {
// handle error
}
}

Просмотреть файл

@ -0,0 +1,4 @@
---
category: newQuery
---
* Added a new query, `cpp/type-confusion`, to detect casts to invalid types.

Просмотреть файл

@ -0,0 +1,74 @@
edges
| test.cpp:17:13:17:18 | new | test.cpp:18:21:18:47 | p | provenance | |
| test.cpp:22:13:22:26 | new | test.cpp:23:12:23:30 | p | provenance | |
| test.cpp:27:13:27:18 | new | test.cpp:28:25:28:55 | p | provenance | |
| test.cpp:32:13:32:30 | new | test.cpp:33:12:33:30 | p | provenance | |
| test.cpp:47:21:47:36 | new | test.cpp:48:22:48:55 | p | provenance | |
| test.cpp:66:15:66:21 | new | test.cpp:67:12:67:31 | a | provenance | |
| test.cpp:76:15:76:21 | new | test.cpp:77:12:77:31 | a | provenance | |
| test.cpp:83:9:83:15 | new | test.cpp:88:14:88:33 | a | provenance | |
| test.cpp:85:9:85:15 | new | test.cpp:88:14:88:33 | a | provenance | |
| test.cpp:115:12:115:17 | new | test.cpp:116:20:116:51 | s2 | provenance | |
| test.cpp:127:12:127:17 | new | test.cpp:128:24:128:59 | s2 | provenance | |
| test.cpp:140:12:140:17 | new | test.cpp:141:23:141:57 | s1 | provenance | |
| test.cpp:143:14:143:19 | new | test.cpp:145:28:145:68 | s1_2 | provenance | |
| test.cpp:153:9:153:15 | new | test.cpp:159:14:159:33 | a | provenance | |
| test.cpp:166:9:166:15 | new | test.cpp:171:14:171:33 | a | provenance | |
| test.cpp:168:9:168:15 | new | test.cpp:171:14:171:33 | a | provenance | |
| test.cpp:179:15:179:24 | new | test.cpp:181:15:181:25 | u64 | provenance | |
| test.cpp:187:15:187:24 | new | test.cpp:189:25:189:45 | u64 | provenance | |
| test.cpp:207:14:207:26 | new | test.cpp:209:17:209:28 | si | provenance | |
| test.cpp:217:13:217:18 | new | test.cpp:218:30:218:65 | p | provenance | |
| test.cpp:226:13:226:18 | new | test.cpp:227:29:227:63 | p | provenance | |
nodes
| test.cpp:17:13:17:18 | new | semmle.label | new |
| test.cpp:18:21:18:47 | p | semmle.label | p |
| test.cpp:22:13:22:26 | new | semmle.label | new |
| test.cpp:23:12:23:30 | p | semmle.label | p |
| test.cpp:27:13:27:18 | new | semmle.label | new |
| test.cpp:28:25:28:55 | p | semmle.label | p |
| test.cpp:32:13:32:30 | new | semmle.label | new |
| test.cpp:33:12:33:30 | p | semmle.label | p |
| test.cpp:47:21:47:36 | new | semmle.label | new |
| test.cpp:48:22:48:55 | p | semmle.label | p |
| test.cpp:66:15:66:21 | new | semmle.label | new |
| test.cpp:67:12:67:31 | a | semmle.label | a |
| test.cpp:76:15:76:21 | new | semmle.label | new |
| test.cpp:77:12:77:31 | a | semmle.label | a |
| test.cpp:83:9:83:15 | new | semmle.label | new |
| test.cpp:85:9:85:15 | new | semmle.label | new |
| test.cpp:88:14:88:33 | a | semmle.label | a |
| test.cpp:115:12:115:17 | new | semmle.label | new |
| test.cpp:116:20:116:51 | s2 | semmle.label | s2 |
| test.cpp:127:12:127:17 | new | semmle.label | new |
| test.cpp:128:24:128:59 | s2 | semmle.label | s2 |
| test.cpp:140:12:140:17 | new | semmle.label | new |
| test.cpp:141:23:141:57 | s1 | semmle.label | s1 |
| test.cpp:143:14:143:19 | new | semmle.label | new |
| test.cpp:145:28:145:68 | s1_2 | semmle.label | s1_2 |
| test.cpp:153:9:153:15 | new | semmle.label | new |
| test.cpp:159:14:159:33 | a | semmle.label | a |
| test.cpp:166:9:166:15 | new | semmle.label | new |
| test.cpp:168:9:168:15 | new | semmle.label | new |
| test.cpp:171:14:171:33 | a | semmle.label | a |
| test.cpp:179:15:179:24 | new | semmle.label | new |
| test.cpp:181:15:181:25 | u64 | semmle.label | u64 |
| test.cpp:187:15:187:24 | new | semmle.label | new |
| test.cpp:189:25:189:45 | u64 | semmle.label | u64 |
| test.cpp:207:14:207:26 | new | semmle.label | new |
| test.cpp:209:17:209:28 | si | semmle.label | si |
| test.cpp:217:13:217:18 | new | semmle.label | new |
| test.cpp:218:30:218:65 | p | semmle.label | p |
| test.cpp:226:13:226:18 | new | semmle.label | new |
| test.cpp:227:29:227:63 | p | semmle.label | p |
subpaths
#select
| test.cpp:28:25:28:55 | p | test.cpp:27:13:27:18 | new | test.cpp:28:25:28:55 | p | Conversion from $@ to $@ is invalid. | test.cpp:1:8:1:9 | S1 | S1 | test.cpp:11:8:11:21 | Not_S1_wrapper | Not_S1_wrapper |
| test.cpp:33:12:33:30 | p | test.cpp:32:13:32:30 | new | test.cpp:33:12:33:30 | p | Conversion from $@ to $@ is invalid. | test.cpp:11:8:11:21 | Not_S1_wrapper | Not_S1_wrapper | test.cpp:1:8:1:9 | S1 | S1 |
| test.cpp:67:12:67:31 | a | test.cpp:66:15:66:21 | new | test.cpp:67:12:67:31 | a | Conversion from $@ to $@ is invalid. | test.cpp:55:8:55:10 | Cat | Cat | test.cpp:60:8:60:10 | Dog | Dog |
| test.cpp:128:24:128:59 | s2 | test.cpp:127:12:127:17 | new | test.cpp:128:24:128:59 | s2 | Conversion from $@ to $@ is invalid. | test.cpp:102:8:102:9 | S2 | S2 | test.cpp:119:8:119:20 | Not_S2_prefix | Not_S2_prefix |
| test.cpp:145:28:145:68 | s1_2 | test.cpp:143:14:143:19 | new | test.cpp:145:28:145:68 | s1_2 | Conversion from $@ to $@ is invalid. | test.cpp:1:8:1:9 | S1 | S1 | test.cpp:131:8:131:23 | HasSomeBitFields | HasSomeBitFields |
| test.cpp:159:14:159:33 | a | test.cpp:153:9:153:15 | new | test.cpp:159:14:159:33 | a | Conversion from $@ to $@ is invalid. | test.cpp:60:8:60:10 | Dog | Dog | test.cpp:55:8:55:10 | Cat | Cat |
| test.cpp:189:25:189:45 | u64 | test.cpp:187:15:187:24 | new | test.cpp:189:25:189:45 | u64 | Conversion from $@ to $@ is invalid. | test.cpp:175:8:175:13 | UInt64 | UInt64 | test.cpp:184:8:184:22 | UInt8_with_more | UInt8_with_more |
| test.cpp:218:30:218:65 | p | test.cpp:217:13:217:18 | new | test.cpp:218:30:218:65 | p | Conversion from $@ to $@ is invalid. | test.cpp:1:8:1:9 | S1 | S1 | test.cpp:212:8:212:26 | UnrelatedStructSize | UnrelatedStructSize |
| test.cpp:227:29:227:63 | p | test.cpp:226:13:226:18 | new | test.cpp:227:29:227:63 | p | Conversion from $@ to $@ is invalid. | test.cpp:1:8:1:9 | S1 | S1 | test.cpp:221:8:221:25 | TooLargeBufferSize | TooLargeBufferSize |

Просмотреть файл

@ -0,0 +1 @@
Security/CWE/CWE-843/TypeConfusion.ql

Просмотреть файл

@ -0,0 +1,230 @@
struct S1 {
int a;
void* b;
unsigned char c;
};
struct S1_wrapper {
S1 s1;
};
struct Not_S1_wrapper {
unsigned char x;
S1 s1;
};
void test1() {
void* p = new S1;
S1_wrapper* s1w = static_cast<S1_wrapper*>(p); // GOOD
}
void test2() {
void* p = new S1_wrapper;
S1* s1 = static_cast<S1*>(p); // GOOD
}
void test3() {
void* p = new S1;
Not_S1_wrapper* s1w = static_cast<Not_S1_wrapper*>(p); // BAD
}
void test4() {
void* p = new Not_S1_wrapper;
S1* s1 = static_cast<S1*>(p); // BAD
}
struct HasBitFields {
int x : 16;
int y : 16;
int z : 32;
};
struct BufferStruct {
unsigned char buffer[sizeof(HasBitFields)];
};
void test5() {
HasBitFields* p = new HasBitFields;
BufferStruct* bs = reinterpret_cast<BufferStruct*>(p); // GOOD
}
struct Animal {
virtual ~Animal();
};
struct Cat : public Animal {
Cat();
~Cat();
};
struct Dog : public Animal {
Dog();
~Dog();
};
void test6() {
Animal* a = new Cat;
Dog* d = static_cast<Dog*>(a); // BAD
}
void test7() {
Animal* a = new Cat;
Dog* d = dynamic_cast<Dog*>(a); // GOOD
}
void test8() {
Animal* a = new Cat;
Cat* d = static_cast<Cat*>(a); // GOOD
}
void test9(bool b) {
Animal* a;
if(b) {
a = new Cat;
} else {
a = new Dog;
}
if(b) {
Cat* d = static_cast<Cat*>(a); // GOOD
}
}
/**
* The layout of S2 is:
* 0: int
* 8: void*
* 16: unsigned char
* 16 + pad: unsigned char
* 32 + pad: int
* 40 + pad: void*
* 48 + pad: unsigned char
*/
struct S2 {
S1 s1;
unsigned char buffer[16];
S1 s1_2;
};
struct S2_prefix {
int a;
void* p;
unsigned char c;
};
void test10() {
S2* s2 = new S2;
S2_prefix* s2p = reinterpret_cast<S2_prefix*>(s2); // GOOD
}
struct Not_S2_prefix {
int a;
void* p;
void* p2;
unsigned char c;
};
void test11() {
S2* s2 = new S2;
Not_S2_prefix* s2p = reinterpret_cast<Not_S2_prefix*>(s2); // BAD
}
struct HasSomeBitFields {
int x : 16;
int y;
int z : 32;
};
void test12() {
// This has doesn't have any non-bitfield member, so we don't detect
// the problem here since the query currently ignores bitfields.
S1* s1 = new S1;
HasBitFields* hbf = reinterpret_cast<HasBitFields*>(s1); // BAD [NOT DETECTED]
S1* s1_2 = new S1;
// This one has a non-bitfield members. So we detect the problem
HasSomeBitFields* hbf2 = reinterpret_cast<HasSomeBitFields*>(s1_2); // BAD
}
void test13(bool b, Cat* c) {
Animal* a;
if(b) {
a = c;
} else {
a = new Dog;
}
// This FP happens despite the `not GoodFlow::flowTo(sinkNode)` condition in the query
// because we don't find a flow path from `a = c` to `static_cast<Cat*>(a)` because
// the "source" (i.e., `a = c`) doesn't have an allocation.
if(b) {
Cat* d = static_cast<Cat*>(a); // GOOD [FALSE POSITIVE]
}
}
void test14(bool b) {
Animal* a;
if(b) {
a = new Cat;
} else {
a = new Dog;
}
if(!b) {
Cat* d = static_cast<Cat*>(a); // BAD [NOT DETECTED]
}
}
struct UInt64 { unsigned long u64; };
struct UInt8 { unsigned char u8; };
void test14() {
void* u64 = new UInt64;
// ...
UInt8* u8 = (UInt8*)u64; // GOOD
}
struct UInt8_with_more { UInt8 u8; void* p; };
void test15() {
void* u64 = new UInt64;
// ...
UInt8_with_more* u8 = (UInt8_with_more*)u64; // BAD
}
struct SingleInt {
int i;
} __attribute__((packed));;
struct PairInts {
int x, y;
} __attribute__((packed));;
union MyUnion
{
PairInts p;
unsigned long long foo;
} __attribute__((packed));
void test16() {
void* si = new SingleInt;
// ...
MyUnion* mu = (MyUnion*)si; // BAD [NOT DETECTED]
}
struct UnrelatedStructSize {
unsigned char buffer[1024];
};
void test17() {
void* p = new S1;
UnrelatedStructSize* uss = static_cast<UnrelatedStructSize*>(p); // BAD
}
struct TooLargeBufferSize {
unsigned char buffer[sizeof(S1) + 1];
};
void test18() {
void* p = new S1;
TooLargeBufferSize* uss = static_cast<TooLargeBufferSize*>(p); // BAD
}
// semmle-extractor-options: --gcc -std=c++11