Java: Use the shared model generator implementation.

This commit is contained in:
Michael Nebel 2024-09-18 13:57:46 +02:00
Родитель 1f3b28a555
Коммит 2033818e39
6 изменённых файлов: 268 добавлений и 992 удалений

Просмотреть файл

@ -760,7 +760,7 @@ ContentApprox getContentApprox(Content c) {
/**
* Holds if the the content `c` is a container.
*/
predicate containerContent(Content c) {
predicate containerContent(ContentSet c) {
c instanceof ArrayContent or
c instanceof CollectionContent or
c instanceof MapKeyContent or

Просмотреть файл

@ -1,635 +1,292 @@
/**
* Provides classes and predicates related to capturing summary, source,
* and sink models of the Standard or a 3rd party library.
* Provides predicates related to capturing summary models of the Standard or a 3rd party library.
*/
private import CaptureModelsSpecific
private import CaptureModelsPrinting
private import java as J
private import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow
private import semmle.code.java.dataflow.internal.DataFlowDispatch
private import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
private import semmle.code.java.dataflow.internal.DataFlowImplSpecific
private import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate
private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.java.dataflow.internal.ModelExclusions
private import semmle.code.java.dataflow.internal.TaintTrackingImplSpecific
private import semmle.code.java.dataflow.SSA as Ssa
private import semmle.code.java.dataflow.TaintTracking
private import codeql.mad.modelgenerator.ModelGeneratorImpl
/**
* A node from which flow can return to the caller. This is either a regular
* `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter.
* Holds if the type `t` is a primitive type used for bulk data.
*/
private class ReturnNodeExt extends DataFlow::Node {
private DataFlowImplCommon::ReturnKindExt kind;
predicate isPrimitiveTypeUsedForBulkData(J::Type t) {
t.hasName(["byte", "char", "Byte", "Character"])
}
ReturnNodeExt() {
kind = DataFlowImplCommon::getValueReturnPosition(this).getKind() or
kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind()
module ModelGeneratorInput implements ModelGeneratorInputSig<Location, JavaDataFlow> {
class Type = J::Type;
class Parameter = J::Parameter;
class Callable = J::Callable;
class NodeExtended extends DataFlow::Node {
Callable getAsExprEnclosingCallable() { result = this.asExpr().getEnclosingCallable() }
}
private predicate isInfrequentlyUsed(J::CompilationUnit cu) {
cu.getPackage().getName().matches("javax.swing%") or
cu.getPackage().getName().matches("java.awt%")
}
private predicate relevant(Callable api) {
api.isPublic() and
api.getDeclaringType().isPublic() and
api.fromSource() and
not isUninterestingForModels(api) and
not isInfrequentlyUsed(api.getCompilationUnit())
}
private J::Method getARelevantOverride(J::Method m) {
result = m.getAnOverride() and
relevant(result) and
// Other exclusions for overrides.
not m instanceof J::ToStringMethod
}
/**
* Gets the kind of the return node.
* Gets the super implementation of `m` if it is relevant.
* If such a super implementations does not exist, returns `m` if it is relevant.
*/
DataFlowImplCommon::ReturnKindExt getKind() { result = kind }
}
bindingset[c]
private signature string printCallableParamSig(Callable c, ParameterPosition p);
private module PrintReturnNodeExt<printCallableParamSig/2 printCallableParam> {
string getOutput(ReturnNodeExt node) {
node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and
result = "ReturnValue"
or
exists(ParameterPosition pos |
pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and
result = printCallableParam(returnNodeEnclosingCallable(node), pos)
)
}
}
string getOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsOutput/2>::getOutput(node)
}
string getContentOutput(ReturnNodeExt node) {
result = PrintReturnNodeExt<paramReturnNodeAsContentOutput/2>::getOutput(node)
}
class DataFlowSummaryTargetApi extends SummaryTargetApi {
DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) }
}
class DataFlowSourceTargetApi = SourceTargetApi;
class DataFlowSinkTargetApi = SinkTargetApi;
private module ModelPrintingInput implements ModelPrintingSig {
class SummaryApi = DataFlowSummaryTargetApi;
class SourceOrSinkApi = SourceOrSinkTargetApi;
string getProvenance() { result = "df-generated" }
}
module Printing = ModelPrinting<ModelPrintingInput>;
/**
* Holds if `c` is a relevant content kind, where the underlying type is relevant.
*/
private predicate isRelevantTypeInContent(DataFlow::ContentSet c) {
isRelevantType(getUnderlyingContentType(c))
}
/**
* Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`.
*/
private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
exists(DataFlow::ContentSet f |
DataFlowPrivate::readStep(node1, f, node2) and
// Partially restrict the content types used for intermediate steps.
(not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f))
)
or
exists(DataFlow::ContentSet f | DataFlowPrivate::storeStep(node1, f, node2) | containerContent(f))
}
/**
* Holds if content `c` is either a field, a synthetic field or language specific
* content of a relevant type or a container like content.
*/
pragma[nomagic]
private predicate isRelevantContent0(DataFlow::ContentSet c) {
isRelevantTypeInContent(c) or
containerContent(c)
}
/**
* Gets the MaD string representation of the parameter node `p`.
*/
string parameterNodeAsInput(DataFlow::ParameterNode p) {
result = parameterAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterNodeAsContentInput(DataFlow::ParameterNode p) {
result = parameterContentAccess(p.asParameter())
or
result = qualifierString() and p instanceof InstanceParameterNode
}
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgument(DataFlow::Node source) { result = asInputArgumentSpecific(source) }
/**
* Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`).
*/
string captureQualifierFlow(DataFlowSummaryTargetApi api) {
exists(ReturnNodeExt ret |
api = returnNodeEnclosingCallable(ret) and
isOwnInstanceAccessNode(ret)
) and
result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue")
}
private int accessPathLimit0() { result = 2 }
private newtype TTaintState =
TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or
TTaintStore(int n) { n in [1 .. accessPathLimit0()] }
abstract private class TaintState extends TTaintState {
abstract string toString();
}
/**
* A FlowState representing a tainted read.
*/
private class TaintRead extends TaintState, TTaintRead {
private int step;
TaintRead() { this = TTaintRead(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintRead(" + step + ")" }
}
/**
* A FlowState representing a tainted write.
*/
private class TaintStore extends TaintState, TTaintStore {
private int step;
TaintStore() { this = TTaintStore(step) }
/**
* Gets the flow state step number.
*/
int getStep() { result = step }
override string toString() { result = "TaintStore(" + step + ")" }
}
/**
* A data-flow configuration for tracking flow through APIs.
* The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters.
*
* This can be used to generate Flow summaries for APIs from parameter to return.
*/
module PropagateFlowConfig implements DataFlow::StateConfigSig {
class FlowState = TaintState;
predicate isSource(DataFlow::Node source, FlowState state) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi and
state.(TaintRead).getStep() = 0
}
predicate isSink(DataFlow::Node sink, FlowState state) {
sink instanceof ReturnNodeExt and
not isOwnInstanceAccessNode(sink) and
not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and
(state instanceof TaintRead or state instanceof TaintStore)
}
predicate isAdditionalFlowStep(
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
) {
exists(DataFlow::ContentSet c |
DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and
isRelevantContent0(c) and
(
state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1
or
state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep()
)
)
or
exists(DataFlow::ContentSet c |
DataFlowPrivate::readStep(node1, c, node2) and
isRelevantContent0(c) and
state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep()
)
}
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
module PropagateFlow = TaintTracking::GlobalWithState<PropagateFlowConfig>;
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow0(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt
) {
exists(string input, string output |
p.getEnclosingCallable() = api and
returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and
input = parameterNodeAsInput(p) and
output = getOutput(returnNodeExt) and
input != output and
result = Printing::asLiftedTaintModel(api, input, output)
)
}
/**
* Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter.
*/
string captureThroughFlow(DataFlowSummaryTargetApi api) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
PropagateFlow::flow(p, returnNodeExt) and
result = captureThroughFlow0(api, p, returnNodeExt)
)
}
private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
source instanceof DataFlow::ParameterNode and
source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi
}
predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2;
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
}
int accessPathLimit() { result = 2 }
predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) }
DataFlow::FlowFeature getAFeature() {
result instanceof DataFlow::FeatureEqualSourceSinkCallContext
}
}
private module PropagateContentFlow = ContentDataFlow::Global<PropagateContentFlowConfig>;
private string getContent(PropagateContentFlow::AccessPath ap, int i) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
i = 0 and
result = "." + printContent(head)
or
i > 0 and result = getContent(tail, i - 1)
)
}
/**
* Gets the MaD string representation of a store step access path.
*/
private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i)
}
/**
* Gets the MaD string representation of a read step access path.
*/
private string printReadAccessPath(PropagateContentFlow::AccessPath ap) {
result = concat(int i | | getContent(ap, i), "" order by i desc)
}
/**
* Holds if the access path `ap` contains a field or synthetic field access.
*/
private predicate mentionsField(PropagateContentFlow::AccessPath ap) {
exists(ContentSet head, PropagateContentFlow::AccessPath tail |
head = ap.getHead() and
tail = ap.getTail()
|
mentionsField(tail) or isField(head)
)
}
private predicate apiFlow(
DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.getEnclosingCallable() = api
}
/**
* A class of APIs relevant for modeling using content flow.
* The following heuristic is applied:
* Content flow is only relevant for an API, if
* #content flow <= 2 * #parameters + 3
* If an API produces more content flow, it is likely that
* 1. Types are not sufficiently constrained leading to a combinatorial
* explosion in dispatch and thus in the generated summaries.
* 2. It is a reasonable approximation to use the non-content based flow
* detection instead, as reads and stores would use a significant
* part of an objects internal state.
*/
private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
ContentDataFlowSummaryTargetApi() {
count(string input, string output |
exists(
DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads,
ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores
|
apiFlow(this, p, reads, returnNodeExt, stores, _) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores)
)
) <= 2 * this.getNumberOfParameters() + 3
}
}
pragma[nomagic]
private predicate apiContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath stores, boolean preservesValue
) {
PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and
returnNodeExt.getEnclosingCallable() = api and
p.getEnclosingCallable() = api
}
/**
* Holds if any of the content sets in `path` translates into a synthetic field.
*/
private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) {
exists(PropagateContentFlow::AccessPath tail, ContentSet head |
head = path.getHead() and
tail = path.getTail()
|
exists(getSyntheticName(head)) or
hasSyntheticContent(tail)
)
}
/**
* A module containing predicates for validating access paths containing content sets
* that translates into synthetic fields, when used for generated summary models.
*/
private module AccessPathSyntheticValidation {
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`).
*/
private predicate step(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt |
p.getType() = t1 and
returnNodeExt.getType() = t2 and
apiContentFlow(_, p, read, returnNodeExt, store, _)
)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
*
* Step A -> Synth.
*/
private predicate synthPathEntry(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
not hasSyntheticContent(read) and
hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists an API that has content flow from `read` (on type `t1`)
* to `store` (on type `t2`), where `read` has synthetic content
* and `store` does not.
*
* Step Synth -> A.
*/
private predicate synthPathExit(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
hasSyntheticContent(read) and
not hasSyntheticContent(store) and
step(t1, read, t2, store)
}
/**
* Holds if there exists a path of steps from `read` to an exit.
*
* read ->* Synth -> A
*/
private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) {
synthPathExit(t, read, _, _)
or
hasSyntheticContent(read) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(t, read, midType, mid) and
reachesSynthExit(midType, mid.reverse())
)
}
/**
* Holds if there exists a path of steps from an entry to `store`.
*
* A -> Synth ->* store
*/
private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) {
synthPathEntry(_, _, t, store)
or
hasSyntheticContent(store) and
exists(PropagateContentFlow::AccessPath mid, Type midType |
hasSyntheticContent(mid) and
step(midType, mid, t, store) and
synthEntryReaches(midType, mid.reverse())
)
}
/**
* Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
* contain content that will be translated into a synthetic field, when being used in
* a MaD summary model, and if there is a range of APIs, such that
* when chaining their flow access paths, there exists access paths `A` and `B` where
* A ->* read -> store ->* B and where `A` and `B` do not contain content that will
* be translated into a synthetic field.
*
* This is needed because we don't want to include summaries that reads from or
* stores into a "dead" synthetic field.
*
* Example:
* Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
* `setX`, which gets and sets a private field `X` on `t`.
* This would lead to the following content flows
* getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
* setX : Argument[0] -> Argument[this].SyntheticField[t.X]
* As the reads and stores are on synthetic fields we should only make summaries
* if both of these methods exist.
*/
pragma[nomagic]
predicate acceptReadStore(
Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store
) {
synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse())
or
exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read |
synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store)
private J::Callable liftedImpl(J::Callable m) {
(
result = getARelevantOverride(m)
or
synthEntryReaches(t1, store0) and
step(t1, read, t2, store) and
reachesSynthExit(t2, store.reverse())
)
result = m and relevant(m)
) and
not exists(getARelevantOverride(result))
}
}
/**
* Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
* Flow is considered relevant,
* 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
* 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
* the synthetic content is "live" on the relevant declaring type.
*/
private predicate apiRelevantContentFlow(
ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p,
PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt,
PropagateContentFlow::AccessPath store, boolean preservesValue
) {
apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and
(
not hasSyntheticContent(read) and not hasSyntheticContent(store)
or
AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store)
)
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable()
}
pragma[nomagic]
private predicate captureContentFlow0(
ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue,
boolean lift
) {
exists(
DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads,
PropagateContentFlow::AccessPath stores
|
apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and
input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and
output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and
input != output and
(if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true)
)
}
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable()
}
/**
* Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
* the return value or a parameter).
*
* Models are lifted to the best type in case the read and store access paths do not
* contain a field or synthetic field access.
*/
string captureContentFlow(ContentDataFlowSummaryTargetApi api) {
exists(string input, string output, boolean lift, boolean preservesValue |
captureContentFlow0(api, input, output, _, lift) and
preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and
result = Printing::asModel(api, input, output, preservesValue, lift)
)
}
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable()
}
/**
* A dataflow configuration used for finding new sources.
* The sources are the already known existing sources and the sinks are the API return nodes.
*
* This can be used to generate Source summaries for an API, if the API expose an already known source
* via its return (then the API itself becomes a source).
*/
module PropagateFromSourceConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
exists(string kind |
isRelevantSourceKind(kind) and
ExternalFlow::sourceNode(source, kind)
predicate isUninterestingForDataFlowModels(Callable api) {
api.getDeclaringType() instanceof J::Interface and not exists(api.getBody())
}
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
class SinkTargetApi extends SourceOrSinkTargetApi {
SinkTargetApi() { not hasManualSinkModel(this) }
}
class SourceTargetApi extends SourceOrSinkTargetApi {
SourceTargetApi() { not hasManualSourceModel(this) }
}
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
Callable lift() { result = lift }
predicate isRelevant() { relevant(this) }
}
private string isExtensible(Callable c) {
if c.getDeclaringType().isFinal() then result = "false" else result = "true"
}
/**
* Holds if the callable `c` is in package `package`
* and is a member of `type`.
*/
private predicate qualifiedName(Callable c, string package, string type) {
exists(RefType t | t = c.getDeclaringType() |
package = t.getCompilationUnit().getPackage().getName() and
type = t.getErasure().(J::RefType).getNestedName()
)
}
predicate isSink(DataFlow::Node sink) {
sink instanceof ReturnNodeExt and
sink.getEnclosingCallable() instanceof DataFlowSourceTargetApi
predicate isRelevantType(Type t) {
not t instanceof J::TypeClass and
not t instanceof J::EnumType and
not t instanceof J::PrimitiveType and
not t instanceof J::BoxedType and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and
(
not t.(J::Array).getElementType() instanceof J::PrimitiveType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(J::Array).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType())
)
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext }
predicate isBarrier(DataFlow::Node n) {
exists(Type t | t = n.getType() and not isRelevantType(t))
Type getUnderlyingContentType(DataFlow::ContentSet c) {
result = c.(DataFlow::FieldContent).getField().getType() or
result = c.(DataFlow::SyntheticFieldContent).getField().getType()
}
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
}
}
string qualifierString() { result = "Argument[this]" }
private module PropagateFromSource = TaintTracking::Global<PropagateFromSourceConfig>;
/**
* Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`.
*/
string captureSource(DataFlowSourceTargetApi api) {
exists(DataFlow::Node source, ReturnNodeExt sink, string kind |
PropagateFromSource::flow(source, sink) and
ExternalFlow::sourceNode(source, kind) and
api = sink.getEnclosingCallable() and
not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and
result = Printing::asSourceModel(api, getOutput(sink), kind)
)
}
/**
* A dataflow configuration used for finding new sinks.
* The sources are the parameters of the API and the fields of the enclosing type.
*
* This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field)
* into an existing known sink (then the API itself becomes a sink).
*/
module PropagateToSinkConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) {
apiSource(source) and source.getEnclosingCallable() instanceof DataFlowSinkTargetApi
string parameterAccess(J::Parameter p) {
if
p.getType() instanceof J::Array and
not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType())
then result = "Argument[" + p.getPosition() + "].ArrayElement"
else
if p.getType() instanceof ContainerFlow::ContainerType
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
}
predicate isSink(DataFlow::Node sink) {
exists(string kind | isRelevantSinkKind(kind) and ExternalFlow::sinkNode(sink, kind))
}
string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
predicate isBarrier(DataFlow::Node node) {
exists(Type t | t = node.getType() and not isRelevantType(t))
class InstanceParameterNode = DataFlow::InstanceParameterNode;
bindingset[c]
string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) {
result = parameterAccess(c.getParameter(pos))
or
sinkModelSanitizer(node)
result = qualifierString() and pos = -1
}
DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext }
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
isRelevantTaintStep(node1, node2)
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = parameterContentAccess(c.getParameter(pos))
or
result = qualifierString() and pos = -1
}
Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable()
}
predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) {
node.asExpr().(J::ThisAccess).isOwnInstanceAccess()
}
predicate sinkModelSanitizer(DataFlow::Node node) {
// exclude variable capture jump steps
exists(Ssa::SsaImplicitInit closure |
closure.captures(_) and
node.asExpr() = closure.getAFirstUse()
)
}
predicate apiSource(DataFlow::Node source) {
(
source.asExpr().(J::FieldAccess).isOwnFieldAccess() or
source instanceof DataFlow::ParameterNode
) and
exists(J::RefType t |
t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and
not t instanceof J::TypeObject and
t.isPublic()
)
}
predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() }
string getInputArgument(DataFlow::Node source) {
exists(int pos |
source.(DataFlow::ParameterNode).isParameterOf(_, pos) and
if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString()
)
or
source.asExpr() instanceof J::FieldAccess and
result = qualifierString()
}
bindingset[kind]
predicate isRelevantSinkKind(string kind) {
not kind = "log-injection" and
not kind.matches("regex-use%") and
not kind = "file-content-store"
}
bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
predicate containerContent = DataFlowPrivate::containerContent/1;
predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and
not exists(DataFlow::Content f |
DataFlowPrivate::readStep(node1, f, node2) and containerContent(f)
)
}
predicate isField(DataFlow::ContentSet c) {
c instanceof DataFlowUtil::FieldContent or
c instanceof DataFlowUtil::SyntheticFieldContent
}
string getSyntheticName(DataFlow::ContentSet c) {
exists(Field f |
not f.isPublic() and
f = c.(DataFlowUtil::FieldContent).getField() and
result = f.getQualifiedName()
)
or
result = c.(DataFlowUtil::SyntheticFieldContent).getField()
}
string printContent(DataFlow::ContentSet c) {
exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() |
result = "Field[" + f.getQualifiedName() + "]"
)
or
result = "SyntheticField[" + getSyntheticName(c) + "]"
or
c instanceof DataFlowUtil::CollectionContent and result = "Element"
or
c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement"
or
c instanceof DataFlowUtil::MapValueContent and result = "MapValue"
or
c instanceof DataFlowUtil::MapKeyContent and result = "MapKey"
}
predicate partialModel(
Callable api, string package, string type, string extensible, string name, string parameters
) {
qualifiedName(api, package, type) and
extensible = isExtensible(api) and
name = api.getName() and
parameters = ExternalFlow::paramsString(api)
}
predicate sourceNode = ExternalFlow::sourceNode/2;
predicate sinkNode = ExternalFlow::sinkNode/2;
}
private module PropagateToSink = TaintTracking::Global<PropagateToSinkConfig>;
/**
* Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink.
*/
string captureSink(DataFlowSinkTargetApi api) {
exists(DataFlow::Node src, DataFlow::Node sink, string kind |
PropagateToSink::flow(src, sink) and
ExternalFlow::sinkNode(sink, kind) and
api = src.getEnclosingCallable() and
result = Printing::asSinkModel(api, asInputArgument(src), kind)
)
}
import MakeModelGenerator<Location, JavaDataFlow, JavaTaintTracking, ModelGeneratorInput>

Просмотреть файл

@ -1,11 +1,11 @@
private import java as J
private import codeql.mad.modelgenerator.ModelPrinting
private import CaptureModelsSpecific as Specific
private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput
private module ModelPrintingLang implements ModelPrintingLangSig {
class Callable = J::Callable;
predicate partialModel = Specific::partialModel/6;
predicate partialModel = ModelGeneratorInput::partialModel/6;
}
import ModelPrintingImpl<ModelPrintingLang>

Просмотреть файл

@ -1,380 +0,0 @@
/**
* Provides predicates related to capturing summary models of the Standard or a 3rd party library.
*/
private import java as J
private import semmle.code.java.dataflow.internal.DataFlowPrivate
private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil
private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.java.dataflow.internal.ModelExclusions
private import semmle.code.java.dataflow.DataFlow as Df
private import semmle.code.java.dataflow.internal.ContentDataFlow as Cdf
private import semmle.code.java.dataflow.SSA as Ssa
private import semmle.code.java.dataflow.TaintTracking as Tt
import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon
import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate
import semmle.code.java.dataflow.internal.DataFlowDispatch as DataFlowDispatch
module DataFlow = Df::DataFlow;
module ContentDataFlow = Cdf::ContentDataFlow;
module TaintTracking = Tt::TaintTracking;
class Type = J::Type;
class Unit = J::Unit;
class Callable = J::Callable;
class ContentSet = DataFlowUtil::ContentSet;
private predicate isInfrequentlyUsed(J::CompilationUnit cu) {
cu.getPackage().getName().matches("javax.swing%") or
cu.getPackage().getName().matches("java.awt%")
}
private predicate relevant(Callable api) {
api.isPublic() and
api.getDeclaringType().isPublic() and
api.fromSource() and
not isUninterestingForModels(api) and
not isInfrequentlyUsed(api.getCompilationUnit())
}
private J::Method getARelevantOverride(J::Method m) {
result = m.getAnOverride() and
relevant(result) and
// Other exclusions for overrides.
not m instanceof J::ToStringMethod
}
/**
* Gets the super implementation of `m` if it is relevant.
* If such a super implementations does not exist, returns `m` if it is relevant.
*/
private J::Callable liftedImpl(J::Callable m) {
(
result = getARelevantOverride(m)
or
result = m and relevant(m)
) and
not exists(getARelevantOverride(result))
}
private predicate hasManualSummaryModel(Callable api) {
api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or
api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable()
}
private predicate hasManualSourceModel(Callable api) {
api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable()
}
private predicate hasManualSinkModel(Callable api) {
api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or
api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable()
}
/**
* Holds if it is irrelevant to generate models for `api` based on data flow analysis.
*
* This serves as an extra filter for the `relevant` predicate.
*/
predicate isUninterestingForDataFlowModels(Callable api) {
api.getDeclaringType() instanceof J::Interface and not exists(api.getBody())
}
/**
* A class of callables that are potentially relevant for generating source or
* sink models.
*/
class SourceOrSinkTargetApi extends Callable {
SourceOrSinkTargetApi() { relevant(this) }
}
/**
* A class of callables that are potentially relevant for generating sink models.
*/
class SinkTargetApi extends SourceOrSinkTargetApi {
SinkTargetApi() { not hasManualSinkModel(this) }
}
/**
* A class of callables that are potentially relevant for generating source models.
*/
class SourceTargetApi extends SourceOrSinkTargetApi {
SourceTargetApi() { not hasManualSourceModel(this) }
}
/**
* Holds if it is irrelevant to generate models for `api` based on type-based analysis.
*
* This serves as an extra filter for the `relevant` predicate.
*/
predicate isUninterestingForTypeBasedFlowModels(Callable api) { none() }
/**
* A class of callables that are potentially relevant for generating summary or
* neutral models.
*
* In the Standard library and 3rd party libraries it is the callables (or callables that have a
* super implementation) that can be called from outside the library itself.
*/
class SummaryTargetApi extends Callable {
private Callable lift;
SummaryTargetApi() {
lift = liftedImpl(this) and
not hasManualSummaryModel(lift)
}
/**
* Gets the callable that a model will be lifted to.
*/
Callable lift() { result = lift }
/**
* Holds if this callable is relevant in terms of generating models.
*/
predicate isRelevant() { relevant(this) }
}
private string isExtensible(Callable c) {
if c.getDeclaringType().isFinal() then result = "false" else result = "true"
}
/**
* Holds if the callable `c` is in package `package`
* and is a member of `type`.
*/
private predicate qualifiedName(Callable c, string package, string type) {
exists(RefType t | t = c.getDeclaringType() |
package = t.getCompilationUnit().getPackage().getName() and
type = t.getErasure().(J::RefType).getNestedName()
)
}
predicate partialModel(
Callable api, string package, string type, string extensible, string name, string parameters
) {
qualifiedName(api, package, type) and
extensible = isExtensible(api) and
name = api.getName() and
parameters = ExternalFlow::paramsString(api)
}
predicate isPrimitiveTypeUsedForBulkData(J::Type t) {
t.hasName(["byte", "char", "Byte", "Character"])
}
/**
* Holds for type `t` for fields that are relevant as an intermediate
* read or write step in the data flow analysis.
*/
predicate isRelevantType(J::Type t) {
not t instanceof J::TypeClass and
not t instanceof J::EnumType and
not t instanceof J::PrimitiveType and
not t instanceof J::BoxedType and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and
not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and
(
not t.(J::Array).getElementType() instanceof J::PrimitiveType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(J::Array).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType())
) and
(
not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or
isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType())
)
}
/**
* Gets the underlying type of the content `c`.
*/
J::Type getUnderlyingContentType(DataFlow::Content c) {
result = c.(DataFlow::FieldContent).getField().getType() or
result = c.(DataFlow::SyntheticFieldContent).getField().getType()
}
/**
* Gets the MaD string representation of the qualifier.
*/
string qualifierString() { result = "Argument[this]" }
/**
* Gets the MaD string representation of the parameter `p`.
*/
string parameterAccess(J::Parameter p) {
if
p.getType() instanceof J::Array and
not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType())
then result = "Argument[" + p.getPosition() + "].ArrayElement"
else
if p.getType() instanceof ContainerFlow::ContainerType
then result = "Argument[" + p.getPosition() + "].Element"
else result = "Argument[" + p.getPosition() + "]"
}
/**
* Gets the MaD string representation of the parameter `p`
* when used in content flow.
*/
string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" }
class InstanceParameterNode = DataFlow::InstanceParameterNode;
class ParameterPosition = DataFlowDispatch::ParameterPosition;
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c`.
*/
bindingset[c]
string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) {
result = parameterAccess(c.getParameter(pos))
or
result = qualifierString() and pos = -1
}
/**
* Gets the MaD string representation of return through parameter at position
* `pos` of callable `c` for content flow.
*/
bindingset[c]
string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) {
result = parameterContentAccess(c.getParameter(pos))
or
result = qualifierString() and pos = -1
}
/**
* Gets the enclosing callable of `ret`.
*/
Callable returnNodeEnclosingCallable(DataFlow::Node ret) {
result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable()
}
/**
* Holds if `node` is an own instance access.
*/
predicate isOwnInstanceAccessNode(ReturnNode node) {
node.asExpr().(J::ThisAccess).isOwnInstanceAccess()
}
predicate sinkModelSanitizer(DataFlow::Node node) {
// exclude variable capture jump steps
exists(Ssa::SsaImplicitInit closure |
closure.captures(_) and
node.asExpr() = closure.getAFirstUse()
)
}
/**
* Holds if `source` is an api entrypoint relevant for creating sink models.
*/
predicate apiSource(DataFlow::Node source) {
(
source.asExpr().(J::FieldAccess).isOwnFieldAccess() or
source instanceof DataFlow::ParameterNode
) and
exists(J::RefType t |
t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and
not t instanceof J::TypeObject and
t.isPublic()
)
}
/**
* Holds if it is not relevant to generate a source model for `api`, even
* if flow is detected from a node within `source` to a sink within `api`.
*/
predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() }
/**
* Gets the MaD input string representation of `source`.
*/
string asInputArgumentSpecific(DataFlow::Node source) {
exists(int pos |
source.(DataFlow::ParameterNode).isParameterOf(_, pos) and
if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString()
)
or
source.asExpr() instanceof J::FieldAccess and
result = qualifierString()
}
/**
* Holds if `kind` is a relevant sink kind for creating sink models.
*/
bindingset[kind]
predicate isRelevantSinkKind(string kind) {
not kind = "log-injection" and
not kind.matches("regex-use%") and
not kind = "file-content-store"
}
/**
* Holds if `kind` is a relevant source kind for creating source models.
*/
bindingset[kind]
predicate isRelevantSourceKind(string kind) { any() }
predicate containerContent = DataFlowPrivate::containerContent/1;
/**
* Holds if there is a taint step from `node1` to `node2` in content flow.
*/
predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and
not exists(DataFlow::Content f |
DataFlowPrivate::readStep(node1, f, node2) and containerContent(f)
)
}
/**
* Holds if the content set `c` is a field or a synthetic field.
*/
predicate isField(ContentSet c) {
c instanceof DataFlowUtil::FieldContent or
c instanceof DataFlowUtil::SyntheticFieldContent
}
/**
* Gets the MaD synthetic name string representation for the content set `c`, if any.
*/
string getSyntheticName(DataFlow::ContentSet c) {
exists(Field f |
not f.isPublic() and
f = c.(DataFlowUtil::FieldContent).getField() and
result = f.getQualifiedName()
)
or
result = c.(DataFlowUtil::SyntheticFieldContent).getField()
}
/**
* Gets the MaD string representation of the content set `c`.
*/
string printContent(ContentSet c) {
exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() |
result = "Field[" + f.getQualifiedName() + "]"
)
or
result = "SyntheticField[" + getSyntheticName(c) + "]"
or
c instanceof DataFlowUtil::CollectionContent and result = "Element"
or
c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement"
or
c instanceof DataFlowUtil::MapValueContent and result = "MapValue"
or
c instanceof DataFlowUtil::MapKeyContent and result = "MapKey"
}

Просмотреть файл

@ -80,5 +80,5 @@ string captureFlow(DataFlowSummaryTargetApi api) {
string captureNoFlow(DataFlowSummaryTargetApi api) {
not exists(DataFlowSummaryTargetApi api0 | exists(captureFlow(api0)) and api0.lift() = api.lift()) and
api.isRelevant() and
result = Printing::asNeutralSummaryModel(api)
result = ModelPrinting::asNeutralSummaryModel(api)
}

Просмотреть файл

@ -1,7 +1,8 @@
private import java
private import semmle.code.java.Collections
private import semmle.code.java.dataflow.internal.ContainerFlow
private import CaptureModelsSpecific as Specific
private import CaptureModels as CaptureModels
private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput
private import CaptureModelsPrinting
/**
@ -81,7 +82,7 @@ private predicate localTypeParameter(Callable callable, TypeVariable tv) {
private string getAccessPath(Type t) {
if
t instanceof Array and
not Specific::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
not CaptureModels::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType())
then result = ".ArrayElement"
else
if t instanceof ContainerType or t instanceof IterableClass
@ -134,7 +135,7 @@ private string implicit(Callable callable, TypeVariable tv) {
then access = getAccessPath(decl)
else access = getSyntheticField(tv)
|
result = Specific::qualifierString() + access
result = ModelGeneratorInput::qualifierString() + access
)
}
@ -286,7 +287,7 @@ private predicate output(Callable callable, TypeVariable tv, string output) {
module ModelPrintingInput implements ModelPrintingSig {
class SummaryApi = TypeBasedFlowTargetApi;
class SourceOrSinkApi = Specific::SourceOrSinkTargetApi;
class SourceOrSinkApi = ModelGeneratorInput::SourceOrSinkTargetApi;
string getProvenance() { result = "tb-generated" }
}
@ -297,9 +298,7 @@ private module Printing = ModelPrinting<ModelPrintingInput>;
* A class of callables that are relevant generating summaries for based
* on the Theorems for Free approach.
*/
class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi {
TypeBasedFlowTargetApi() { not Specific::isUninterestingForTypeBasedFlowModels(this) }
class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi {
/**
* Gets the string representation of all type based summaries for `this`
* inspired by the Theorems for Free approach.