This commit is contained in:
Stephan Brandauer 2024-11-19 10:40:04 +01:00
Родитель aea7c3fc81
Коммит 4208f031e3
36 изменённых файлов: 0 добавлений и 2994 удалений

Просмотреть файл

@ -1,197 +0,0 @@
#!/bin/bash
set -e
help="Usage: ./publish [--override-release] [--dry-run]
Publish the automodel query pack.
If no arguments are provided, publish the version of the codeql repo specified by the latest official release of the codeml-automodel repo.
If the --override-release argument is provided, your current local HEAD is used (for unofficial releases or patching).
If the --dry-run argument is provided, the release is not published (for testing purposes)."
# Echo the help message
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
echo "$help"
exit 0
fi
# Check the number of arguments are valid
if [ $# -gt 2 ]; then
echo "Error: Invalid arguments provided"
echo "$help"
exit 1
fi
OVERRIDE_RELEASE=0
DRY_RUN=0
for arg in "$@"
do
case $arg in
--override-release)
OVERRIDE_RELEASE=1
shift # Remove --override-release from processing
;;
--dry-run)
DRY_RUN=1
shift # Remove --dry-run from processing
;;
*)
echo "Error: Invalid argument provided: $arg"
echo "$help"
exit 1
;;
esac
done
# Describe what we're about to do based on the command-line arguments
if [ $OVERRIDE_RELEASE = 1 ]; then
echo "Publishing the current HEAD of the automodel repo"
else
echo "Publishing the version of the automodel repo specified by the latest official release of the codeml-automodel repo"
fi
if [ $DRY_RUN = 1 ]; then
echo "Dry run: we will step through the process but we won't publish the query pack"
else
echo "Not a dry run! Publishing the query pack"
fi
# If we're publishing the codeml-automodel release then we will checkout the sha specified in the release.
# So we need to check that there are no uncommitted changes in the local branch.
# And, if we're publishing the current HEAD, it's cleaner to ensure that there are no uncommitted changes.
if ! git diff --quiet; then
echo "Error: Uncommitted changes exist. Please commit or stash your changes before publishing."
exit 1
fi
# Check the above environment variables are set
if [ -z "${GITHUB_TOKEN}" ]; then
echo "Error: GITHUB_TOKEN environment variable not set. Please set this to a token with package:write permissions to codeql."
exit 1
fi
if [ -z "${GH_TOKEN}" ]; then
echo "Error: GH_TOKEN environment variable not set. Please set this to a token with repo permissions to github/codeml-automodel."
exit 1
fi
# Get the sha of the previous release, i.e. the last commit to the main branch that updated the query pack version
PREVIOUS_RELEASE_SHA=$(git rev-list -n 1 main -- ./src/qlpack.yml)
if [ -z "$PREVIOUS_RELEASE_SHA" ]; then
echo "Error: Could not get the sha of the previous release of codeml-automodel query pack"
exit 1
else
echo "Previous query-pack release sha: $PREVIOUS_RELEASE_SHA"
fi
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
CURRENT_SHA=$(git rev-parse HEAD)
if [ $OVERRIDE_RELEASE = 1 ]; then
# Check that the current HEAD is downstream from PREVIOUS_RELEASE_SHA
if ! git merge-base --is-ancestor "$PREVIOUS_RELEASE_SHA" "$CURRENT_SHA"; then
echo "Error: The current HEAD is not downstream from the previous release"
exit 1
fi
else
# Get the latest release of codeml-automodel
TAG_NAME=$(gh api -H 'Accept: application/vnd.github+json' -H 'X-GitHub-Api-Version: 2022-11-28' /repos/github/codeml-automodel/releases/latest | jq -r .tag_name)
# Check TAG_NAME is not empty
if [ -z "$TAG_NAME" ]; then
echo "Error: Could not get latest release of codeml-automodel"
exit 1
fi
echo "Updating to latest automodel release: $TAG_NAME"
# Before downloading, delete any existing release.zip, and ignore failure if not present
rm release.zip || true
gh release download $TAG_NAME -A zip -O release.zip --repo 'https://github.com/github/codeml-automodel'
# Before unzipping, delete any existing release directory, and ignore failure if not present
rm -rf release || true
unzip -o release.zip -d release
REVISION=$(jq -r '.["codeql-sha"]' release/codeml-automodel*/codeml-automodel-release.json)
echo "The latest codeml-automodel release specifies the codeql sha $REVISION"
# Check that REVISION is downstream from PREVIOUS_RELEASE_SHA
if ! git merge-base --is-ancestor "$PREVIOUS_RELEASE_SHA" "$REVISION"; then
echo "Error: The codeql version $REVISION is not downstream of the query-pack version $PREVIOUS_RELEASE_SHA"
exit 1
fi
# Get the version of the codeql code specified by the codeml-automodel release
git checkout "$REVISION"
fi
# Get the absolute path of the automodel repo
AUTOMODEL_ROOT="$(readlink -f "$(dirname $0)")"
# Get the absolute path of the workspace root
WORKSPACE_ROOT="$AUTOMODEL_ROOT/../../.."
# Specify the groups of queries to test and publish
GRPS="automodel,-test"
# Install the codeql gh extension
gh extensions install github/gh-codeql
pushd "$AUTOMODEL_ROOT"
echo Testing automodel queries
gh codeql test run test
popd
pushd "$WORKSPACE_ROOT"
echo "Preparing the release"
gh codeql pack release --groups $GRPS -v
if [ $DRY_RUN = 1 ]; then
echo "Dry run: not publishing the query pack"
gh codeql pack publish --groups $GRPS --dry-run -v
else
echo "Not a dry run! Publishing the query pack"
gh codeql pack publish --groups $GRPS -v
fi
echo "Bumping versions"
gh codeql pack post-release --groups $GRPS -v
popd
# The above commands update
# ./src/CHANGELOG.md
# ./src/codeql-pack.release.yml
# ./src/qlpack.yml
# and add a new file
# ./src/change-notes/released/<version>.md
# Get the filename of the most recently created file in ./src/change-notes/released/*.md
# This will be the file for the new release
NEW_CHANGE_NOTES_FILE=$(ls -t ./src/change-notes/released/*.md | head -n 1)
# Make a copy of the modified files
mv ./src/CHANGELOG.md ./src/CHANGELOG.md.dry-run
mv ./src/codeql-pack.release.yml ./src/codeql-pack.release.yml.dry-run
mv ./src/qlpack.yml ./src/qlpack.yml.dry-run
mv "$NEW_CHANGE_NOTES_FILE" ./src/change-notes/released.md.dry-run
if [ $OVERRIDE_RELEASE = 1 ]; then
# Restore the original files
git checkout ./src/CHANGELOG.md
git checkout ./src/codeql-pack.release.yml
git checkout ./src/qlpack.yml
else
# Restore the original files
git checkout "$CURRENT_BRANCH" --force
fi
if [ $DRY_RUN = 1 ]; then
echo "Inspect the updated dry-run version files:"
ls -l ./src/*.dry-run
ls -l ./src/change-notes/*.dry-run
else
# Add the updated files to the current branch
echo "Adding the version changes"
mv -f ./src/CHANGELOG.md.dry-run ./src/CHANGELOG.md
mv -f ./src/codeql-pack.release.yml.dry-run ./src/codeql-pack.release.yml
mv -f ./src/qlpack.yml.dry-run ./src/qlpack.yml
mv -f ./src/change-notes/released.md.dry-run "$NEW_CHANGE_NOTES_FILE"
git add ./src/CHANGELOG.md
git add ./src/codeql-pack.release.yml
git add ./src/qlpack.yml
git add "$NEW_CHANGE_NOTES_FILE"
echo "Added the following updated version files to the current branch:"
git status -s
echo "To complete the release, please commit these files and merge to the main branch"
fi
echo "Done"

Просмотреть файл

@ -1,183 +0,0 @@
private import java
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
private import semmle.code.java.dataflow.TaintTracking
private import semmle.code.java.security.RequestForgeryConfig
private import semmle.code.java.security.CommandLineQuery
private import semmle.code.java.security.SqlConcatenatedQuery
private import semmle.code.java.security.SqlInjectionQuery
private import semmle.code.java.security.UrlRedirectQuery
private import semmle.code.java.security.TaintedPathQuery
private import semmle.code.java.security.SqlInjectionQuery
private import AutomodelJavaUtil
private newtype TSinkModel =
MkSinkModel(
string package, string type, boolean subtypes, string name, string signature, string ext,
string input, string kind, string provenance
) {
ExternalFlow::sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance,
_)
}
class SinkModel extends TSinkModel {
string package;
string type;
boolean subtypes;
string name;
string signature;
string ext;
string input;
string kind;
string provenance;
SinkModel() {
this = MkSinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance)
}
/** Gets the package for this sink model. */
string getPackage() { result = package }
/** Gets the type for this sink model. */
string getType() { result = type }
/** Gets whether this sink model considers subtypes. */
boolean getSubtypes() { result = subtypes }
/** Gets the name for this sink model. */
string getName() { result = name }
/** Gets the signature for this sink model. */
string getSignature() { result = signature }
/** Gets the input for this sink model. */
string getInput() { result = input }
/** Gets the extension for this sink model. */
string getExt() { result = ext }
/** Gets the kind for this sink model. */
string getKind() { result = kind }
/** Gets the provenance for this sink model. */
string getProvenance() { result = provenance }
/** Gets the number of instances of this sink model. */
int getInstanceCount() { result = count(PotentialSinkModelExpr p | p.getSinkModel() = this) }
/** Gets a string representation of this sink model. */
string toString() {
result =
"SinkModel(" + package + ", " + type + ", " + subtypes + ", " + name + ", " + signature + ", "
+ ext + ", " + input + ", " + kind + ", " + provenance + ")"
}
/** Gets a string representation of this sink model as it would appear in a Models-as-Data file. */
string getRepr() {
result =
"\"" + package + "\", \"" + type + "\", " + pyBool(subtypes) + ", \"" + name + "\", \"" +
signature + "\", \"" + ext + "\", \"" + input + "\", \"" + kind + "\", \"" + provenance +
"\""
}
}
/** An expression that may correspond to a sink model. */
class PotentialSinkModelExpr extends Expr {
/**
* Holds if this expression has the given signature. The signature should contain enough
* information to determine a corresponding sink model, if one exists.
*/
pragma[nomagic]
predicate hasSignature(
string package, string type, boolean subtypes, string name, string signature, string input
) {
exists(Call call, Callable callable, int argIdx |
call.getCallee().getSourceDeclaration() = callable and
(
this = call.getArgument(argIdx)
or
this = call.getQualifier() and argIdx = -1
) and
(if argIdx = -1 then input = "Argument[this]" else input = "Argument[" + argIdx + "]") and
package = callable.getDeclaringType().getPackage().getName() and
type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and
subtypes = considerSubtypes(callable) and
name = callable.getName() and
signature = ExternalFlow::paramsString(callable)
)
}
/** Gets a sink model that corresponds to this expression. */
SinkModel getSinkModel() {
this.hasSignature(result.getPackage(), result.getType(), result.getSubtypes(), result.getName(),
result.getSignature(), result.getInput())
}
}
private string pyBool(boolean b) {
b = true and result = "True"
or
b = false and result = "False"
}
/**
* Gets a string representation of the existing sink model at the expression `e`, in the format in
* which it would appear in a Models-as-Data file. Also restricts the provenance of the sink model
* to be `ai-generated`.
*/
string getSinkModelRepr(PotentialSinkModelExpr e) {
result = e.getSinkModel().getRepr() and
e.getSinkModel().getProvenance() = "ai-generated"
}
/**
* Gets the string representation of a sink model in a format suitable for appending to an alert
* message.
*/
string getSinkModelQueryRepr(PotentialSinkModelExpr e) {
result = "\nsinkModel: " + getSinkModelRepr(e)
}
/**
* A parameterised module that takes a dataflow config, and exposes a predicate for counting the
* number of AI-generated sink models that appear in alerts for that query.
*/
private module SinkTallier<DataFlow::ConfigSig Config> {
module ConfigFlow = TaintTracking::Global<Config>;
predicate getSinkModelCount(int c, SinkModel s) {
s = any(ConfigFlow::PathNode sink).getNode().asExpr().(PotentialSinkModelExpr).getSinkModel() and
c =
strictcount(ConfigFlow::PathNode sink |
ConfigFlow::flowPath(_, sink) and
s = sink.getNode().asExpr().(PotentialSinkModelExpr).getSinkModel()
)
}
}
predicate sinkModelTallyPerQuery(string queryName, int alertCount, SinkModel sinkModel) {
queryName = "java/request-forgery" and
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
or
queryName = "java/command-line-injection" and
SinkTallier<InputToArgumentToExecFlowConfig>::getSinkModelCount(alertCount, sinkModel)
or
queryName = "java/concatenated-sql-query" and
SinkTallier<UncontrolledStringBuilderSourceFlowConfig>::getSinkModelCount(alertCount, sinkModel)
or
queryName = "java/ssrf" and
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
or
queryName = "java/path-injection" and
SinkTallier<TaintedPathConfig>::getSinkModelCount(alertCount, sinkModel)
or
queryName = "java/unvalidated-url-redirection" and
SinkTallier<UrlRedirectConfig>::getSinkModelCount(alertCount, sinkModel)
or
queryName = "java/sql-injection" and
SinkTallier<QueryInjectionFlowConfig>::getSinkModelCount(alertCount, sinkModel)
}
predicate sinkModelTally(int alertCount, SinkModel sinkModel) {
sinkModelTallyPerQuery(_, _, sinkModel) and
alertCount = sum(int c | sinkModelTallyPerQuery(_, c, sinkModel))
}

Просмотреть файл

@ -1,16 +0,0 @@
/**
* @name Number of alerts per sink model
* @description Counts the number of alerts using `ai-generated` sink models.
* @kind table
* @id java/ml/metrics-count-alerts-per-sink-model
* @tags internal automodel metrics
*/
private import java
private import AutomodelAlertSinkUtil
from int alertCount, SinkModel s
where sinkModelTally(alertCount, s) and s.getProvenance() = "ai-generated"
select alertCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
s.getKind() as kind, s.getProvenance() as provenance order by alertCount desc

Просмотреть файл

@ -1,19 +0,0 @@
/**
* @name Number of alerts per sink model and query
* @description Counts the number of alerts per query using `ai-generated` sink models.
* @kind table
* @id java/ml/metrics-count-alerts-per-sink-model-and-query
* @tags internal automodel metrics
*/
private import java
private import AutomodelAlertSinkUtil
from string queryId, int alertCount, SinkModel s
where
sinkModelTallyPerQuery(queryId, alertCount, s) and
s.getProvenance() = "ai-generated"
select queryId, alertCount, s.getPackage() as package, s.getType() as type,
s.getSubtypes() as subtypes, s.getName() as name, s.getSignature() as signature,
s.getInput() as input, s.getExt() as ext, s.getKind() as kind, s.getProvenance() as provenance
order by queryId, alertCount desc

Просмотреть файл

@ -1,677 +0,0 @@
/**
* For internal use only.
*/
private import java
private import semmle.code.Location as Location
private import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.dataflow.TaintTracking
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
private import semmle.code.java.Expr as Expr
private import semmle.code.java.security.QueryInjection
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
private import AutomodelJavaUtil as AutomodelJavaUtil
private import semmle.code.java.security.PathSanitizer as PathSanitizer
import AutomodelSharedCharacteristics as SharedCharacteristics
import AutomodelEndpointTypes as AutomodelEndpointTypes
newtype JavaRelatedLocationType =
CallContext() or
MethodDoc() or
ClassDoc()
newtype TApplicationModeEndpoint =
TExplicitArgument(Call call, DataFlow::Node arg) {
AutomodelJavaUtil::isFromSource(call) and
exists(Argument argExpr |
arg.asExpr() = argExpr and call = argExpr.getCall() and not argExpr.isVararg()
) and
not AutomodelJavaUtil::isUnexploitableType(arg.getType())
} or
TInstanceArgument(Call call, DataFlow::Node arg) {
AutomodelJavaUtil::isFromSource(call) and
arg = DataFlow::getInstanceArgument(call) and
not call instanceof ConstructorCall and
not AutomodelJavaUtil::isUnexploitableType(arg.getType())
} or
TImplicitVarargsArray(Call call, DataFlow::ImplicitVarargsArray arg, int idx) {
AutomodelJavaUtil::isFromSource(call) and
call = arg.getCall() and
idx = call.getCallee().getVaragsParameterIndex() and
not AutomodelJavaUtil::isUnexploitableType(arg.getType())
} or
TMethodReturnValue(MethodCall call) {
AutomodelJavaUtil::isFromSource(call) and
not AutomodelJavaUtil::isUnexploitableType(call.getType())
} or
TOverriddenParameter(Parameter p, Method overriddenMethod) {
AutomodelJavaUtil::isFromSource(p) and
p.getCallable().(Method).overrides(overriddenMethod)
}
/**
* An endpoint is a node that is a candidate for modeling.
*/
abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint {
/**
* Gets the callable to be modeled that this endpoint represents.
*/
abstract Callable getCallable();
/**
* Gets the input (if any) for this endpoint, eg.: `Argument[0]`.
*
* For endpoints that are source candidates, this will be `none()`.
*/
abstract string getMaDInput();
/**
* Gets the output (if any) for this endpoint, eg.: `ReturnValue`.
*
* For endpoints that are sink candidates, this will be `none()`.
*/
abstract string getMaDOutput();
abstract Top asTop();
/**
* Converts the endpoint to a node that can be used in a data flow graph.
*/
abstract DataFlow::Node asNode();
string getExtensibleType() {
if not exists(this.getMaDInput()) and exists(this.getMaDOutput())
then result = "sourceModel"
else
if exists(this.getMaDInput()) and not exists(this.getMaDOutput())
then result = "sinkModel"
else none() // if both exist, it would be a summaryModel (not yet supported)
}
abstract string toString();
}
class TCallArgument = TExplicitArgument or TInstanceArgument or TImplicitVarargsArray;
/**
* An endpoint that represents an "argument" to a call in a broad sense, including
* both explicit arguments and the instance argument.
*/
abstract class CallArgument extends ApplicationModeEndpoint, TCallArgument {
Call call;
DataFlow::Node arg;
override Callable getCallable() { result = call.getCallee().getSourceDeclaration() }
override string getMaDOutput() { none() }
override DataFlow::Node asNode() { result = arg }
Call getCall() { result = call }
override string toString() { result = arg.toString() }
}
/**
* An endpoint that represents an explicit argument to a call.
*/
class ExplicitArgument extends CallArgument, TExplicitArgument {
ExplicitArgument() { this = TExplicitArgument(call, arg) }
private int getArgIndex() { this.asTop() = call.getArgument(result) }
override string getMaDInput() { result = "Argument[" + this.getArgIndex() + "]" }
override Top asTop() { result = arg.asExpr() }
}
/**
* An endpoint that represents the instance argument to a call.
*/
class InstanceArgument extends CallArgument, TInstanceArgument {
InstanceArgument() { this = TInstanceArgument(call, arg) }
override string getMaDInput() { result = "Argument[this]" }
override Top asTop() { if exists(arg.asExpr()) then result = arg.asExpr() else result = call }
override string toString() { result = arg.toString() }
}
/**
* An endpoint that represents an implicit varargs array.
* We choose to represent the varargs array as a single endpoint, rather than as multiple endpoints.
*
* This avoids the problem of having to deal with redundant endpoints downstream.
*
* In order to be able to distinguish between varargs endpoints and regular endpoints, we export the `isVarargsArray`
* meta data field in the extraction queries.
*/
class ImplicitVarargsArray extends CallArgument, TImplicitVarargsArray {
int idx;
ImplicitVarargsArray() { this = TImplicitVarargsArray(call, arg, idx) }
override string getMaDInput() { result = "Argument[" + idx + "]" }
override Top asTop() { result = call }
}
/**
* An endpoint that represents a method call. The `ReturnValue` of a method call
* may be a source.
*/
class MethodReturnValue extends ApplicationModeEndpoint, TMethodReturnValue {
MethodCall call;
MethodReturnValue() { this = TMethodReturnValue(call) }
override Callable getCallable() { result = call.getCallee().getSourceDeclaration() }
override string getMaDInput() { none() }
override string getMaDOutput() { result = "ReturnValue" }
override Top asTop() { result = call }
override DataFlow::Node asNode() { result.asExpr() = call }
override string toString() { result = call.toString() }
}
/**
* An endpoint that represents a parameter of an overridden method that may be
* a source.
*/
class OverriddenParameter extends ApplicationModeEndpoint, TOverriddenParameter {
Parameter p;
Method overriddenMethod;
OverriddenParameter() { this = TOverriddenParameter(p, overriddenMethod) }
override Callable getCallable() {
// NB: we're returning the overridden callable here. This means that the
// candidate model will be about the overridden method, not the overriding
// method. This is a more general model, that also applies to other
// subclasses of the overridden class.
result = overriddenMethod.getSourceDeclaration()
}
private int getArgIndex() { p.getCallable().getParameter(result) = p }
override string getMaDInput() { none() }
override string getMaDOutput() { result = "Parameter[" + this.getArgIndex() + "]" }
override Top asTop() { result = p }
override DataFlow::Node asNode() { result.(DataFlow::ParameterNode).asParameter() = p }
override string toString() { result = p.toString() }
}
/**
* A candidates implementation.
*
* Some important notes:
* - This mode is using arguments as endpoints.
* - We use the `CallContext` (the surrounding call expression) as related location.
*/
module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig {
// for documentation of the implementations here, see the QLDoc in the CandidateSig signature module.
class Endpoint = ApplicationModeEndpoint;
class EndpointType = AutomodelEndpointTypes::EndpointType;
class SinkType = AutomodelEndpointTypes::SinkType;
class SourceType = AutomodelEndpointTypes::SourceType;
class RelatedLocation = Location::Top;
class RelatedLocationType = JavaRelatedLocationType;
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
predicate isSanitizer(Endpoint e, EndpointType t) {
exists(t) and
AutomodelJavaUtil::isUnexploitableType([
// for most endpoints, we can get the type from the node
e.asNode().getType(),
// but not for calls to void methods, where we need to go via the AST
e.asTop().(Expr).getType()
])
or
t instanceof AutomodelEndpointTypes::PathInjectionSinkType and
e.asNode() instanceof PathSanitizer::PathInjectionSanitizer
}
RelatedLocation asLocation(Endpoint e) { result = e.asTop() }
predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2;
predicate isSink(Endpoint e, string kind, string provenance) {
exists(
string package, string type, boolean subtypes, string name, string signature, string ext,
string input
|
sinkSpec(e, package, type, subtypes, name, signature, ext, input) and
ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, kind,
provenance, _)
)
or
isCustomSink(e, kind) and provenance = "custom-sink"
}
predicate isSource(Endpoint e, string kind, string provenance) {
exists(
string package, string type, boolean subtypes, string name, string signature, string ext,
string output
|
sourceSpec(e, package, type, subtypes, name, signature, ext, output) and
ExternalFlow::sourceModel(package, type, subtypes, name, [signature, ""], ext, output, kind,
provenance, _)
)
}
predicate isNeutral(Endpoint e) {
exists(string package, string type, string name, string signature, string endpointType |
sinkSpec(e, package, type, _, name, signature, _, _) and
endpointType = "sink"
or
sourceSpec(e, package, type, _, name, signature, _, _) and
endpointType = "source"
|
ExternalFlow::neutralModel(package, type, name, [signature, ""], endpointType, _)
)
}
/**
* Holds if the endpoint concerns a callable with the given package, type, name and signature.
*
* If `subtypes` is `false`, only the exact callable is considered. If `true`, the callable and
* all its overrides are considered.
*/
additional predicate endpointCallable(
Endpoint e, string package, string type, boolean subtypes, string name, string signature
) {
exists(Callable c |
c = e.getCallable() and subtypes in [true, false]
or
e.getCallable().(Method).getSourceDeclaration().overrides+(c) and subtypes = true
|
c.hasQualifiedName(package, type, name) and
signature = ExternalFlow::paramsString(c)
)
}
additional predicate sinkSpec(
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
string ext, string input
) {
endpointCallable(e, package, type, subtypes, name, signature) and
ext = "" and
input = e.getMaDInput()
}
additional predicate sourceSpec(
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
string ext, string output
) {
endpointCallable(e, package, type, subtypes, name, signature) and
ext = "" and
output = e.getMaDOutput()
}
/**
* Gets the related location for the given endpoint.
*
* The only related location we model is the the call expression surrounding to
* which the endpoint is either argument or qualifier (known as the call context).
*/
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
type = CallContext() and
result = e.(CallArgument).getCall()
or
type = MethodDoc() and
result = e.getCallable().(Documentable).getJavadoc()
or
type = ClassDoc() and
result = e.getCallable().getDeclaringType().(Documentable).getJavadoc()
}
}
/**
* Contains endpoints that are defined in QL code rather than as a MaD model. Ideally this predicate
* should be empty.
*/
private predicate isCustomSink(Endpoint e, string kind) {
e.asNode() instanceof QueryInjectionSink and kind = "sql"
}
module CharacteristicsImpl =
SharedCharacteristics::SharedCharacteristics<ApplicationCandidatesImpl>;
class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
class Endpoint = ApplicationCandidatesImpl::Endpoint;
/*
* Predicates that are used to surface prompt examples and candidates for classification with an ML model.
*/
/**
* A MetadataExtractor that extracts metadata for application mode.
*/
class ApplicationModeMetadataExtractor extends string {
ApplicationModeMetadataExtractor() { this = "ApplicationModeMetadataExtractor" }
predicate hasMetadata(
Endpoint e, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargsArray, string alreadyAiModeled,
string extensibleType
) {
exists(Callable callable | e.getCallable() = callable |
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
package = callable.getDeclaringType().getPackage().getName() and
// we're using the erased types because the MaD convention is to not specify type parameters.
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
name = callable.getName() and
signature = ExternalFlow::paramsString(callable) and
(
if e instanceof ImplicitVarargsArray
then isVarargsArray = "true"
else isVarargsArray = "false"
) and
extensibleType = e.getExtensibleType()
) and
(
not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = ""
or
CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled)
)
}
}
/**
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
*
* The other parameters record various other properties of interest.
*/
predicate isCandidate(
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
) {
CharacteristicsImpl::isCandidate(endpoint, _) and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
}
/**
* Holds if the given `endpoint` is a negative example for the `extensibleType`
* because of the `characteristic`.
*
* The other parameters record various other properties of interest.
*/
predicate isNegativeExample(
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
string type, string subtypes, string name, string signature, string input, string output,
string isVarargsArray, string extensibleType
) {
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(AutomodelEndpointTypes::EndpointType tp |
tp = CharacteristicsImpl::getAPotentialType(endpoint)
|
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}
/**
* Holds if the given `endpoint` is a positive example for the `endpointType`.
*
* The other parameters record various other properties of interest.
*/
predicate isPositiveExample(
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
string signature, string input, string output, string isVarargsArray, string extensibleType
) {
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
}
/*
* EndpointCharacteristic classes that are specific to Automodel for Java.
*/
/**
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks.
*
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
*
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
*/
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
UnexploitableIsCharacteristic() { this = "argument of is-style boolean method" }
override predicate appliesToEndpoint(Endpoint e) {
e.getCallable().getName().matches("is%") and
e.getCallable().getReturnType() instanceof BooleanType and
not ApplicationCandidatesImpl::isSink(e, _, _)
}
}
/**
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
* considered sinks.
*
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
*/
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
UnexploitableExistsCharacteristic() { this = "argument of existence-checking boolean method" }
override predicate appliesToEndpoint(Endpoint e) {
exists(Callable callable | callable = e.getCallable() |
callable.getName().toLowerCase() = ["exists", "notexists"] and
callable.getReturnType() instanceof BooleanType
)
}
}
/**
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
* and its return value should not be considered a source.
*/
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
ExceptionCharacteristic() { this = "argument/result of exception-related method" }
override predicate appliesToEndpoint(Endpoint e) {
e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and
(
e.getExtensibleType() = "sinkModel" and
not ApplicationCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not ApplicationCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}
/**
* A negative characteristic that indicates that an endpoint is a MaD taint step. MaD modeled taint steps are global,
* so they are not sinks for any query. Non-MaD taint steps might be specific to a particular query, so we don't
* filter those out.
*/
private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
IsMaDTaintStepCharacteristic() { this = "taint step" }
override predicate appliesToEndpoint(Endpoint e) {
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e.asNode(), _, _)
or
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e.asNode(), _, _)
or
FlowSummaryImpl::Private::Steps::summaryGetterStep(e.asNode(), _, _, _)
or
FlowSummaryImpl::Private::Steps::summarySetterStep(e.asNode(), _, _, _)
}
}
/**
* A call to a method that's known locally will not be considered as a candidate to model.
*
* The reason is that we would expect data/taint flow into the method implementation to uncover
* any sinks that are present there.
*/
private class LocalCall extends CharacteristicsImpl::UninterestingToModelCharacteristic {
LocalCall() { this = "local call" }
override predicate appliesToEndpoint(Endpoint e) {
e.(CallArgument).getCallable().fromSource()
or
e.(MethodReturnValue).getCallable().fromSource()
}
}
/**
* A characteristic that marks endpoints as uninteresting to model, according to the Java ModelExclusions module.
*/
private class ExcludedFromModeling extends CharacteristicsImpl::UninterestingToModelCharacteristic {
ExcludedFromModeling() { this = "excluded from modeling" }
override predicate appliesToEndpoint(Endpoint e) {
ModelExclusions::isUninterestingForModels(e.getCallable())
}
}
/**
* A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in
* the standard Java modeling, because they cannot be called from outside the package.
*/
private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic
{
NonPublicMethodCharacteristic() { this = "non-public method" }
override predicate appliesToEndpoint(Endpoint e) {
exists(Callable c | c = e.getCallable() | not c.isPublic())
}
}
/**
* A negative characteristic that indicates that an endpoint is a non-sink argument to a method whose sinks have already
* been modeled _manually_. This is restricted to manual sinks only, because only during the manual process do we have
* the expectation that all sinks present in a method have been considered.
*
* WARNING: These endpoints should not be used as negative samples for training, because some sinks may have been missed
* when the method was modeled. Specifically, as we start using ATM to merge in new declarations, we can be less sure
* that a method with one argument modeled as a MaD sink has also had its remaining arguments manually reviewed. The
* ML model might have predicted argument 0 of some method to be a sink but not argument 1, when in fact argument 1 is
* also a sink.
*/
private class OtherArgumentToModeledMethodCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic
{
OtherArgumentToModeledMethodCharacteristic() {
this = "other argument to a method that has already been modeled manually"
}
override predicate appliesToEndpoint(Endpoint e) {
not ApplicationCandidatesImpl::isSink(e, _, _) and
exists(CallArgument otherSink |
ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and
e.(CallArgument).getCall() = otherSink.getCall() and
e != otherSink
)
}
}
/**
* Holds if the type of the given expression is annotated with `@FunctionalInterface`.
*/
predicate hasFunctionalInterfaceType(Expr e) {
exists(RefType tp | tp = e.getType().getErasure() |
tp.getAnAssociatedAnnotation().getType().hasQualifiedName("java.lang", "FunctionalInterface")
)
}
/**
* A characteristic that marks functional expression as likely not sinks.
*
* These expressions may well _contain_ sinks, but rarely are sinks themselves.
*/
private class FunctionValueCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic {
FunctionValueCharacteristic() { this = "function value" }
override predicate appliesToEndpoint(Endpoint e) {
exists(Expr expr | expr = e.asNode().asExpr() |
expr instanceof FunctionalExpr or hasFunctionalInterfaceType(expr)
)
}
}
/**
* A negative characteristic that indicates that an endpoint is not a `to` node for any known taint step. Such a node
* cannot be tainted, because taint can't flow into it.
*
* WARNING: These endpoints should not be used as negative samples for training, because they may include sinks for
* which our taint tracking modeling is incomplete.
*/
private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic
{
CannotBeTaintedCharacteristic() { this = "cannot be tainted" }
override predicate appliesToEndpoint(Endpoint e) { not this.isKnownOutNodeForStep(e) }
/**
* Holds if the node `n` is known as the predecessor in a modeled flow step.
*/
private predicate isKnownOutNodeForStep(Endpoint e) {
e.asNode().asExpr() instanceof Call or // we just assume flow in that case
TaintTracking::localTaintStep(_, e.asNode()) or
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(_, e.asNode(), _) or
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(_, e.asNode(), _) or
FlowSummaryImpl::Private::Steps::summaryGetterStep(_, _, e.asNode(), _) or
FlowSummaryImpl::Private::Steps::summarySetterStep(_, _, e.asNode(), _)
}
}

Просмотреть файл

@ -1,81 +0,0 @@
/**
* Surfaces the endpoints that are not already known to be sinks, and are therefore used as candidates for
* classification with an ML model.
*
* Note: This query does not actually classify the endpoints using the model.
*
* @name Automodel candidates (application mode)
* @description A query to extract automodel candidates in application mode.
* @kind problem
* @problem.severity recommendation
* @id java/ml/extract-automodel-application-candidates
* @tags internal extract automodel application-mode candidates
*/
import java
private import AutomodelApplicationModeCharacteristics
private import AutomodelJavaUtil
/**
* Gets a sample of endpoints (of at most `limit` samples) with the given method signature.
*
* The main purpose of this helper predicate is to avoid selecting too many candidates, as this may
* cause the SARIF file to exceed the maximum size limit.
*/
bindingset[limit]
private Endpoint getSampleForSignature(
int limit, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
) {
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
num_endpoints =
count(Endpoint e |
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType)
)
|
result =
rank[n](Endpoint e, Location loc |
loc = e.asTop().getLocation() and
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType)
|
e
order by
loc.getFile().getAbsolutePath(), loc.getStartLine(), loc.getStartColumn(),
loc.getEndLine(), loc.getEndColumn()
) and
// To avoid selecting samples that are too close together (as the ranking above goes by file
// path first), we select `limit` evenly spaced samples from the ranked list of endpoints. By
// default this would always include the first sample, so we add a random-chosen prime offset
// to the first sample index, and reduce modulo the number of endpoints.
// Finally, we add 1 to the result, as ranking results in a 1-indexed relation.
n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints)
)
}
from
Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes,
DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString alreadyAiModeled, DollarAtString extensibleType
where
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray,
extensibleType, alreadyAiModeled) and
endpoint =
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
isVarargsArray, extensibleType, alreadyAiModeled)
select endpoint.asNode(),
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
package, "package", //
type, "type", //
subtypes, "subtypes", //
name, "name", // method name
signature, "signature", //
input, "input", //
output, "output", //
isVarargsArray, "isVarargsArray", //
alreadyAiModeled, "alreadyAiModeled", //
extensibleType, "extensibleType"

Просмотреть файл

@ -1,66 +0,0 @@
/**
* Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
*
* @name Negative examples (application mode)
* @kind problem
* @problem.severity recommendation
* @id java/ml/extract-automodel-application-negative-examples
* @tags internal extract automodel application-mode negative examples
*/
private import java
private import AutomodelApplicationModeCharacteristics
private import AutomodelEndpointTypes
private import AutomodelJavaUtil
/**
* Gets a sample of endpoints (of at most `limit` samples) for which the given characteristic applies.
*
* The main purpose of this helper predicate is to avoid selecting too many samples, as this may
* cause the SARIF file to exceed the maximum size limit.
*/
bindingset[limit]
Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
exists(int n, int num_endpoints | num_endpoints = count(Endpoint e | c.appliesToEndpoint(e)) |
result =
rank[n](Endpoint e, Location loc |
loc = e.asTop().getLocation() and c.appliesToEndpoint(e)
|
e
order by
loc.getFile().getAbsolutePath(), loc.getStartLine(), loc.getStartColumn(),
loc.getEndLine(), loc.getEndColumn()
) and
// To avoid selecting samples that are too close together (as the ranking above goes by file
// path first), we select `limit` evenly spaced samples from the ranked list of endpoints. By
// default this would always include the first sample, so we add a random-chosen prime offset
// to the first sample index, and reduce modulo the number of endpoints.
// Finally, we add 1 to the result, as ranking results in a 1-indexed relation.
n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints)
)
}
from
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString extensibleType
where
endpoint = getSampleForCharacteristic(characteristic, 100) and
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
input, output, isVarargsArray, extensibleType) and
message = characteristic
select endpoint.asNode(),
message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
package, "package", //
type, "type", //
subtypes, "subtypes", //
name, "name", //
signature, "signature", //
input, "input", //
output, "output", //
isVarargsArray, "isVarargsArray", //
extensibleType, "extensibleType"

Просмотреть файл

@ -1,37 +0,0 @@
/**
* Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
*
* @name Positive examples (application mode)
* @kind problem
* @problem.severity recommendation
* @id java/ml/extract-automodel-application-positive-examples
* @tags internal extract automodel application-mode positive examples
*/
private import AutomodelApplicationModeCharacteristics
private import AutomodelEndpointTypes
private import AutomodelJavaUtil
from
Endpoint endpoint, EndpointType endpointType, ApplicationModeMetadataExtractor meta,
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString extensibleType
where
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output,
isVarargsArray, extensibleType)
select endpoint.asNode(),
endpointType + "\nrelated locations: $@, $@, $@." +
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
package, "package", //
type, "type", //
subtypes, "subtypes", //
name, "name", //
signature, "signature", //
input, "input", //
output, "output", //
isVarargsArray, "isVarargsArray", //
extensibleType, "extensibleType"

Просмотреть файл

@ -1,5 +0,0 @@
extensions:
- addsTo:
pack: codeql/java-automodel-queries
extensible: automodelCandidateFilter
data: []

Просмотреть файл

@ -1,19 +0,0 @@
/**
* @name Number of instances of each sink model
* @description Counts the number of instances of `ai-generated` sink models.
* @kind table
* @id java/ml/metrics-count-instances-per-sink-model
* @tags internal automodel metrics
*/
private import java
private import AutomodelAlertSinkUtil
from int instanceCount, SinkModel s
where
instanceCount = s.getInstanceCount() and
instanceCount > 0 and
s.getProvenance() = "ai-generated"
select instanceCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
s.getKind() as kind, s.getProvenance() as provenance order by instanceCount desc

Просмотреть файл

@ -1,82 +0,0 @@
/**
* For internal use only.
*
* Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
* only predict classes defined within this file. This file is the source of truth for the integer
* representation of each of these classes.
*/
/** A class that can be predicted by a classifier. */
abstract class EndpointType extends string {
/**
* Holds when the string matches the name of the sink / source type.
*/
bindingset[this]
EndpointType() { any() }
/**
* Gets the name of the sink/source kind for this endpoint type as used in models-as-data.
*
* See https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#LL353C11-L357C31
* for sink types, and https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#L365
* for source types.
*/
final string getKind() { result = this }
}
/** A class for sink types that can be predicted by a classifier. */
abstract class SinkType extends EndpointType {
bindingset[this]
SinkType() { any() }
}
/** A sink relevant to the SQL injection query */
class SqlInjectionSinkType extends SinkType {
SqlInjectionSinkType() { this = "sql-injection" }
}
/** A sink relevant to the tainted path injection query. */
class PathInjectionSinkType extends SinkType {
PathInjectionSinkType() { this = "path-injection" }
}
/** A sink relevant to the SSRF query. */
class RequestForgerySinkType extends SinkType {
RequestForgerySinkType() { this = "request-forgery" }
}
/** A sink relevant to the command injection query. */
class CommandInjectionSinkType extends SinkType {
CommandInjectionSinkType() { this = "command-injection" }
}
/** A sink relevant to file storage. */
class FileContentStoreSinkType extends SinkType {
FileContentStoreSinkType() { this = "file-content-store" }
}
/** A sink relevant to HTML injection. */
class HtmlInjectionSinkType extends SinkType {
HtmlInjectionSinkType() { this = "html-injection" }
}
/** A sink relevant to LDAP injection. */
class LdapInjectionSinkType extends SinkType {
LdapInjectionSinkType() { this = "ldap-injection" }
}
/** A sink relevant to URL redirection. */
class UrlRedirectionSinkType extends SinkType {
UrlRedirectionSinkType() { this = "url-redirection" }
}
/** A class for source types that can be predicted by a classifier. */
abstract class SourceType extends EndpointType {
bindingset[this]
SourceType() { any() }
}
/** A source of remote data. */
class RemoteSourceType extends SourceType {
RemoteSourceType() { this = "remote" }
}

Просмотреть файл

@ -1,507 +0,0 @@
/**
* For internal use only.
*/
private import java
private import semmle.code.Location as Location
private import semmle.code.java.dataflow.DataFlow
private import semmle.code.java.dataflow.TaintTracking
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
private import semmle.code.java.Expr as Expr
private import semmle.code.java.security.QueryInjection
private import semmle.code.java.security.RequestForgery
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
private import AutomodelJavaUtil as AutomodelJavaUtil
import AutomodelSharedCharacteristics as SharedCharacteristics
import AutomodelEndpointTypes as AutomodelEndpointTypes
newtype JavaRelatedLocationType =
MethodDoc() or
ClassDoc()
newtype TFrameworkModeEndpoint =
TExplicitParameter(Parameter p) {
AutomodelJavaUtil::isFromSource(p) and
not AutomodelJavaUtil::isUnexploitableType(p.getType())
} or
TQualifier(Callable c) { AutomodelJavaUtil::isFromSource(c) and not c instanceof Constructor } or
TReturnValue(Callable c) {
AutomodelJavaUtil::isFromSource(c) and
c instanceof Constructor
or
AutomodelJavaUtil::isFromSource(c) and
c instanceof Method and
not AutomodelJavaUtil::isUnexploitableType(c.getReturnType())
} or
TOverridableParameter(Method m, Parameter p) {
AutomodelJavaUtil::isFromSource(p) and
not AutomodelJavaUtil::isUnexploitableType(p.getType()) and
p.getCallable() = m and
m instanceof ModelExclusions::ModelApi and
AutomodelJavaUtil::isOverridable(m)
} or
TOverridableQualifier(Method m) {
AutomodelJavaUtil::isFromSource(m) and
m instanceof ModelExclusions::ModelApi and
AutomodelJavaUtil::isOverridable(m)
}
/**
* A framework mode endpoint.
*/
abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint {
/**
* Gets the input (if any) for this endpoint, eg.: `Argument[0]`.
*
* For endpoints that are source candidates, this will be `none()`.
*/
abstract string getMaDInput();
/**
* Gets the output (if any) for this endpoint, eg.: `ReturnValue`.
*
* For endpoints that are sink candidates, this will be `none()`.
*/
abstract string getMaDOutput();
/**
* Returns the name of the parameter of the endpoint.
*/
abstract string getParamName();
/**
* Returns the callable that contains the endpoint.
*/
abstract Callable getCallable();
abstract Top asTop();
abstract string getExtensibleType();
string toString() { result = this.asTop().toString() }
Location getLocation() { result = this.asTop().getLocation() }
}
class ExplicitParameterEndpoint extends FrameworkModeEndpoint, TExplicitParameter {
Parameter param;
ExplicitParameterEndpoint() { this = TExplicitParameter(param) and param.fromSource() }
override string getMaDInput() { result = "Argument[" + param.getPosition() + "]" }
override string getMaDOutput() { none() }
override string getParamName() { result = param.getName() }
override Callable getCallable() { result = param.getCallable() }
override Top asTop() { result = param }
override string getExtensibleType() { result = "sinkModel" }
}
class QualifierEndpoint extends FrameworkModeEndpoint, TQualifier {
Callable callable;
QualifierEndpoint() {
this = TQualifier(callable) and not callable.isStatic() and callable.fromSource()
}
override string getMaDInput() { result = "Argument[this]" }
override string getMaDOutput() { none() }
override string getParamName() { result = "this" }
override Callable getCallable() { result = callable }
override Top asTop() { result = callable }
override string getExtensibleType() { result = "sinkModel" }
}
class ReturnValue extends FrameworkModeEndpoint, TReturnValue {
Callable callable;
ReturnValue() { this = TReturnValue(callable) and callable.fromSource() }
override string getMaDInput() { none() }
override string getMaDOutput() { result = "ReturnValue" }
override string getParamName() { none() }
override Callable getCallable() { result = callable }
override Top asTop() { result = callable }
override string getExtensibleType() { result = "sourceModel" }
}
class OverridableParameter extends FrameworkModeEndpoint, TOverridableParameter {
Method method;
Parameter param;
OverridableParameter() { this = TOverridableParameter(method, param) }
override string getMaDInput() { none() }
override string getMaDOutput() { result = "Parameter[" + param.getPosition() + "]" }
override string getParamName() { result = param.getName() }
override Callable getCallable() { result = method }
override Top asTop() { result = param }
override string getExtensibleType() { result = "sourceModel" }
}
class OverridableQualifier extends FrameworkModeEndpoint, TOverridableQualifier {
Method m;
OverridableQualifier() { this = TOverridableQualifier(m) }
override string getMaDInput() { none() }
override string getMaDOutput() { result = "Parameter[this]" }
override string getParamName() { result = "this" }
override Callable getCallable() { result = m }
override Top asTop() { result = m }
override string getExtensibleType() { result = "sourceModel" }
}
/**
* A candidates implementation for framework mode.
*
* Some important notes:
* - This mode is using parameters as endpoints.
* - Sink- and neutral-information is being used from MaD models.
* - When available, we use method- and class-java-docs as related locations.
*/
module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
// for documentation of the implementations here, see the QLDoc in the CandidateSig signature module.
class Endpoint = FrameworkModeEndpoint;
class EndpointType = AutomodelEndpointTypes::EndpointType;
class SinkType = AutomodelEndpointTypes::SinkType;
class SourceType = AutomodelEndpointTypes::SourceType;
class RelatedLocation = Location::Top;
class RelatedLocationType = JavaRelatedLocationType;
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
predicate isSanitizer(Endpoint e, EndpointType t) { none() }
RelatedLocation asLocation(Endpoint e) { result = e.asTop() }
predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2;
predicate isSink(Endpoint e, string kind, string provenance) {
exists(
string package, string type, boolean subtypes, string name, string signature, string ext,
string input
|
sinkSpec(e, package, type, subtypes, name, signature, ext, input) and
ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, kind,
provenance, _)
)
}
predicate isSource(Endpoint e, string kind, string provenance) {
exists(
string package, string type, boolean subtypes, string name, string signature, string ext,
string output
|
sourceSpec(e, package, type, subtypes, name, signature, ext, output) and
ExternalFlow::sourceModel(package, type, subtypes, name, [signature, ""], ext, output, kind,
provenance, _)
)
}
predicate isNeutral(Endpoint e) {
exists(string package, string type, string name, string signature, string endpointType |
sinkSpec(e, package, type, _, name, signature, _, _) and
endpointType = "sink"
or
sourceSpec(e, package, type, _, name, signature, _, _) and
endpointType = "source"
|
ExternalFlow::neutralModel(package, type, name, [signature, ""], endpointType, _)
)
}
/**
* Holds if the endpoint concerns a callable with the given package, type, name and signature.
*
* If `subtypes` is `false`, only the exact callable is considered. If `true`, the callable and
* all its overrides are considered.
*/
additional predicate endpointCallable(
Endpoint e, string package, string type, boolean subtypes, string name, string signature
) {
exists(Callable c |
c = e.getCallable() and subtypes in [true, false]
or
e.getCallable().(Method).getSourceDeclaration().overrides+(c) and subtypes = true
|
c.hasQualifiedName(package, type, name) and
signature = ExternalFlow::paramsString(c)
)
}
additional predicate sinkSpec(
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
string ext, string input
) {
endpointCallable(e, package, type, subtypes, name, signature) and
ext = "" and
input = e.getMaDInput()
}
additional predicate sourceSpec(
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
string ext, string output
) {
endpointCallable(e, package, type, subtypes, name, signature) and
ext = "" and
output = e.getMaDOutput()
}
/**
* Gets the related location for the given endpoint.
*
* Related locations can be JavaDoc comments of the class or the method.
*/
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
type = MethodDoc() and
result = e.getCallable().(Documentable).getJavadoc()
or
type = ClassDoc() and
result = e.getCallable().getDeclaringType().(Documentable).getJavadoc()
}
}
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<FrameworkCandidatesImpl>;
class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
class Endpoint = FrameworkCandidatesImpl::Endpoint;
/*
* Predicates that are used to surface prompt examples and candidates for classification with an ML model.
*/
/**
* A MetadataExtractor that extracts metadata for framework mode.
*/
class FrameworkModeMetadataExtractor extends string {
FrameworkModeMetadataExtractor() { this = "FrameworkModeMetadataExtractor" }
predicate hasMetadata(
Endpoint e, string package, string type, string subtypes, string name, string signature,
string input, string output, string parameterName, string alreadyAiModeled,
string extensibleType
) {
exists(Callable callable | e.getCallable() = callable |
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
package = callable.getDeclaringType().getPackage().getName() and
// we're using the erased types because the MaD convention is to not specify type parameters.
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
name = callable.getName() and
signature = ExternalFlow::paramsString(callable) and
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
e.getExtensibleType() = extensibleType
) and
(
not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = ""
or
CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled)
)
}
}
/**
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
*
* The other parameters record various other properties of interest.
*/
predicate isCandidate(
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
) {
CharacteristicsImpl::isCandidate(endpoint, _) and
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
alreadyAiModeled.matches(["", "%ai-%"]) and
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
}
/**
* Holds if the given `endpoint` is a negative example for the `extensibleType`
* because of the `characteristic`.
*
* The other parameters record various other properties of interest.
*/
predicate isNegativeExample(
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
string type, string subtypes, string name, string signature, string input, string output,
string parameterName, string extensibleType
) {
characteristic.appliesToEndpoint(endpoint) and
// the node is known not to be an endpoint of any appropriate type
forall(AutomodelEndpointTypes::EndpointType tp |
tp = CharacteristicsImpl::getAPotentialType(endpoint)
|
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}
/**
* Holds if the given `endpoint` is a positive example for the `endpointType`.
*
* The other parameters record various other properties of interest.
*/
predicate isPositiveExample(
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
string signature, string input, string output, string parameterName, string extensibleType
) {
any(FrameworkModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
_, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
}
/*
* EndpointCharacteristic classes that are specific to Automodel for Java.
*/
/**
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
* and its return value should not be considered a source.
*
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
*
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
*/
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
UnexploitableIsCharacteristic() { this = "argument of is-style boolean method" }
override predicate appliesToEndpoint(Endpoint e) {
e.getCallable().getName().matches("is%") and
e.getCallable().getReturnType() instanceof BooleanType and
(
e.getExtensibleType() = "sinkModel" and
not FrameworkCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not FrameworkCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}
/**
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
* considered sinks, and its return value should not be considered a source.
*
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
*/
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
UnexploitableExistsCharacteristic() { this = "argument of existence-checking boolean method" }
override predicate appliesToEndpoint(Endpoint e) {
exists(Callable callable |
callable = e.getCallable() and
callable.getName().toLowerCase() = ["exists", "notexists"] and
callable.getReturnType() instanceof BooleanType
|
e.getExtensibleType() = "sinkModel" and
not FrameworkCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not FrameworkCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}
/**
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
* and its return value should not be considered a source.
*/
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
ExceptionCharacteristic() { this = "argument/result of exception-related method" }
override predicate appliesToEndpoint(Endpoint e) {
e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and
(
e.getExtensibleType() = "sinkModel" and
not FrameworkCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not FrameworkCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}
/**
* A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
* are considered worth modeling.
*/
private class NotAModelApi extends CharacteristicsImpl::UninterestingToModelCharacteristic {
NotAModelApi() { this = "not a model API" }
override predicate appliesToEndpoint(Endpoint e) {
not e.getCallable() instanceof ModelExclusions::ModelApi
}
}

Просмотреть файл

@ -1,38 +0,0 @@
/**
* Surfaces the endpoints that are not already known to be sinks, and are therefore used as candidates for
* classification with an ML model.
*
* Note: This query does not actually classify the endpoints using the model.
*
* @name Automodel candidates (framework mode)
* @description A query to extract automodel candidates in framework mode.
* @kind problem
* @problem.severity recommendation
* @id java/ml/extract-automodel-framework-candidates
* @tags internal extract automodel framework-mode candidates
*/
private import AutomodelFrameworkModeCharacteristics
private import AutomodelJavaUtil
from
Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes,
DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString parameterName, DollarAtString alreadyAiModeled, DollarAtString extensibleType
where
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
extensibleType, alreadyAiModeled)
select endpoint,
"Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
package, "package", //
type, "type", //
subtypes, "subtypes", //
name, "name", //
signature, "signature", //
input, "input", //
output, "output", //
parameterName, "parameterName", //
alreadyAiModeled, "alreadyAiModeled", //
extensibleType, "extensibleType"

Просмотреть файл

@ -1,36 +0,0 @@
/**
* Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
*
* @name Negative examples (framework mode)
* @kind problem
* @problem.severity recommendation
* @id java/ml/extract-automodel-framework-negative-examples
* @tags internal extract automodel framework-mode negative examples
*/
private import AutomodelFrameworkModeCharacteristics
private import AutomodelEndpointTypes
private import AutomodelJavaUtil
from
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence,
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString parameterName, DollarAtString extensibleType
where
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
input, output, parameterName, extensibleType)
select endpoint,
characteristic + "\nrelated locations: $@, $@." +
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
package, "package", //
type, "type", //
subtypes, "subtypes", //
name, "name", //
signature, "signature", //
input, "input", //
output, "output", //
parameterName, "parameterName", //
extensibleType, "extensibleType"

Просмотреть файл

@ -1,34 +0,0 @@
/**
* Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
*
* @name Positive examples (framework mode)
* @kind problem
* @problem.severity recommendation
* @id java/ml/extract-automodel-framework-positive-examples
* @tags internal extract automodel framework-mode positive examples
*/
private import AutomodelFrameworkModeCharacteristics
private import AutomodelEndpointTypes
private import AutomodelJavaUtil
from
Endpoint endpoint, EndpointType endpointType, DollarAtString package, DollarAtString type,
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
DollarAtString output, DollarAtString parameterName, DollarAtString extensibleType
where
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output,
parameterName, extensibleType)
select endpoint,
endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
package, "package", //
type, "type", //
subtypes, "subtypes", //
name, "name", //
signature, "signature", //
input, "input", //
output, "output", //
parameterName, "parameterName", //
extensibleType, "extensibleType"

Просмотреть файл

@ -1,111 +0,0 @@
private import java
private import AutomodelEndpointTypes as AutomodelEndpointTypes
/**
* A helper class to represent a string value that can be returned by a query using $@ notation.
*
* It extends `string`, but adds a mock `hasLocationInfo` method that returns the string itself as the file name.
*
* Use this, when you want to return a string value from a query using $@ notation - the string value
* will be included in the sarif file.
*
*
* Background information on `hasLocationInfo`:
* https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-location-information
*/
class DollarAtString extends string {
bindingset[this]
DollarAtString() { any() }
bindingset[this]
predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
path = this and sl = 1 and sc = 1 and el = 1 and ec = 1
}
}
/**
* Holds for all combinations of MaD kinds (`kind`) and their human readable
* descriptions.
*/
predicate isKnownKind(string kind, AutomodelEndpointTypes::EndpointType type) {
kind = type.getKind()
}
/**
* By convention, the subtypes property of the MaD declaration should only be
* true when there _can_ exist any subtypes with a different implementation.
*
* It would technically be ok to always use the value 'true', but this would
* break convention.
*/
pragma[nomagic]
boolean considerSubtypes(Callable callable) {
if
callable.isStatic() or
callable.getDeclaringType().isStatic() or
callable.isFinal() or
callable.getDeclaringType().isFinal()
then result = false
else result = true
}
/**
* Holds if the given package, type, name and signature is a candidate for automodeling.
*
* This predicate is extensible, so that different endpoints can be selected at runtime.
*/
extensible predicate automodelCandidateFilter(
string package, string type, string name, string signature
);
/**
* Holds if the given package, type, name and signature is a candidate for automodeling.
*
* This relies on an extensible predicate, and if that is not supplied then
* all endpoints are considered candidates.
*/
bindingset[package, type, name, signature]
predicate includeAutomodelCandidate(string package, string type, string name, string signature) {
not automodelCandidateFilter(_, _, _, _) or
automodelCandidateFilter(package, type, name, signature)
}
/**
* Holds if the given program element corresponds to a piece of source code,
* that is, it is not compiler-generated.
*
* Note: This is a stricter check than `Element::fromSource`, which simply
* checks whether the element is in a source file as opposed to a JAR file.
* There can be compiler-generated elements in source files (especially for
* Kotlin), which we also want to exclude.
*/
predicate isFromSource(Element e) {
// from a source file (not a JAR)
e.fromSource() and
// not explicitly marked as compiler-generated
not e.isCompilerGenerated() and
// does not have a dummy location
not e.hasLocationInfo(_, 0, 0, 0, 0)
}
/**
* Holds if taint cannot flow through the given type (because it is a numeric
* type or some other type with a fixed set of values).
*/
predicate isUnexploitableType(Type tp) {
tp instanceof PrimitiveType or
tp instanceof BoxedType or
tp instanceof NumberType or
tp instanceof VoidType
}
/**
* Holds if the given method can be overridden, that is, it is not final,
* static, or private.
*/
predicate isOverridable(Method m) {
not m.getDeclaringType().isFinal() and
not m.isFinal() and
not m.isStatic() and
not m.isPrivate()
}

Просмотреть файл

@ -1,412 +0,0 @@
float maximalConfidence() { result = 1.0 }
float highConfidence() { result = 0.9 }
float mediumConfidence() { result = 0.6 }
/**
* A specification of how to instantiate the shared characteristics for a given candidate class.
*
* The `CandidateSig` implementation specifies a type to use for Endpoints (eg., `ParameterNode`), as well as a type
* to label endpoint classes (the `EndpointType`). One of the endpoint classes needs to be a 'negative' class, meaning
* "not any of the other known endpoint types".
*/
signature module CandidateSig {
/**
* An endpoint is a potential candidate for modeling. This will typically be bound to the language's
* DataFlow node class, or a subtype thereof.
*/
class Endpoint {
/**
* Gets the kind of this endpoint, either "sourceModel" or "sinkModel".
*/
string getExtensibleType();
/**
* Gets a string representation of this endpoint.
*/
string toString();
}
/**
* A related location for an endpoint. This will typically be bound to the supertype of all AST nodes (eg., `Top`).
*/
class RelatedLocation;
/**
* A label for a related location.
*
* Eg., method-doc, class-doc, etc.
*/
class RelatedLocationType;
/**
* An endpoint type considered by this specification.
*/
class EndpointType extends string;
/**
* A sink endpoint type considered by this specification.
*/
class SinkType extends EndpointType;
/**
* A source endpoint type considered by this specification.
*/
class SourceType extends EndpointType;
/**
* Gets the endpoint as a location.
*
* This is a utility function to convert an endpoint to its corresponding location.
*/
RelatedLocation asLocation(Endpoint e);
/**
* Defines what MaD kinds are known, and what endpoint type they correspond to.
*/
predicate isKnownKind(string kind, EndpointType type);
/**
* Holds if `e` is a flow sanitizer, and has type `t`.
*/
predicate isSanitizer(Endpoint e, EndpointType t);
/**
* Holds if `e` is a sink with the label `kind`, and provenance `provenance`.
*/
predicate isSink(Endpoint e, string kind, string provenance);
/**
* Holds if `e` is a source with the label `kind`, and provenance `provenance`.
*/
predicate isSource(Endpoint e, string kind, string provenance);
/**
* Holds if `e` is not a source or sink of any kind.
*/
predicate isNeutral(Endpoint e);
/**
* Gets a related location.
*
* A related location is a source code location that may hold extra information about an endpoint that can be useful
* to the machine learning model.
*
* For example, a related location for a method call may be the documentation comment of a method.
*/
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType name);
}
/**
* A set of shared characteristics for a given candidate class.
*
* This module is language-agnostic, although the `CandidateSig` module will be language-specific.
*
* The language specific implementation can also further extend the behavior of this module by adding additional
* implementations of endpoint characteristics exported by this module.
*/
module SharedCharacteristics<CandidateSig Candidate> {
predicate isSink = Candidate::isSink/3;
predicate isNeutral = Candidate::isNeutral/1;
predicate isModeled(Candidate::Endpoint e, string kind, string extensibleKind, string provenance) {
Candidate::isSink(e, kind, provenance) and extensibleKind = "sinkModel"
or
Candidate::isSource(e, kind, provenance) and extensibleKind = "sourceModel"
}
/**
* Holds if `endpoint` is modeled as `endpointType`.
*/
predicate isKnownAs(
Candidate::Endpoint endpoint, Candidate::EndpointType endpointType,
EndpointCharacteristic characteristic
) {
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
// known sink for the class.
characteristic.appliesToEndpoint(endpoint) and
characteristic.hasImplications(endpointType, true, maximalConfidence())
}
/**
* Gets a potential type of this endpoint to make sure that sources are
* associated with source types and sinks with sink types.
*/
Candidate::EndpointType getAPotentialType(Candidate::Endpoint endpoint) {
endpoint.getExtensibleType() = "sourceModel" and
result instanceof Candidate::SourceType
or
endpoint.getExtensibleType() = "sinkModel" and
result instanceof Candidate::SinkType
}
/**
* Holds if the given `endpoint` should be considered as a candidate for type `endpointType`,
* and classified by the ML model.
*
* A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics.
*/
predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) {
endpointType = getAPotentialType(endpoint) and
not exists(getAnExcludingCharacteristic(endpoint, endpointType))
}
/**
* Gets the related location of `e` with name `name`, if it exists.
* Otherwise, gets the candidate itself.
*/
Candidate::RelatedLocation getRelatedLocationOrCandidate(
Candidate::Endpoint e, Candidate::RelatedLocationType type
) {
if exists(Candidate::getRelatedLocation(e, type))
then result = Candidate::getRelatedLocation(e, type)
else result = Candidate::asLocation(e)
}
/**
* Gets a characteristics that disbar `endpoint` from being a candidate for `endpointType`
* with at least medium confidence.
*/
EndpointCharacteristic getAnExcludingCharacteristic(
Candidate::Endpoint endpoint, Candidate::EndpointType endpointType
) {
result.appliesToEndpoint(endpoint) and
exists(float confidence |
confidence >= mediumConfidence() and
result.hasImplications(endpointType, false, confidence)
)
}
/**
* A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
* about whether to include the endpoint in the training set and with what kind, as well as whether to score the
* endpoint at inference time.
*/
abstract class EndpointCharacteristic extends string {
/**
* Holds for the string that is the name of the characteristic. This should describe some property of an endpoint
* that is meaningful for determining whether it's a sink, and if so, of which sink type.
*/
bindingset[this]
EndpointCharacteristic() { any() }
/**
* Holds for endpoints that have this characteristic.
*/
abstract predicate appliesToEndpoint(Candidate::Endpoint n);
/**
* This predicate describes what the characteristic tells us about an endpoint.
*
* Params:
* endpointType: The sink/source type.
* isPositiveIndicator: If true, this characteristic indicates that this endpoint _is_ a member of the class; if
* false, it indicates that it _isn't_ a member of the class.
* confidence: A float in [0, 1], which tells us how strong an indicator this characteristic is for the endpoint
* belonging / not belonging to the given class. A confidence near zero means this characteristic is a very weak
* indicator of whether or not the endpoint belongs to the class. A confidence of 1 means that all endpoints with
* this characteristic definitively do/don't belong to the class.
*/
abstract predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
);
/** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
final float getHighConfidenceThreshold() { result = 0.8 }
}
/**
* A high-confidence characteristic that indicates that an endpoint is a sink of a specified type. These endpoints can
* be used as positive samples for training or for a few-shot prompt.
*/
abstract class SinkCharacteristic extends EndpointCharacteristic {
bindingset[this]
SinkCharacteristic() { any() }
abstract Candidate::EndpointType getSinkType();
final override predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
endpointType = this.getSinkType() and
isPositiveIndicator = true and
confidence = maximalConfidence()
}
}
/**
* A high-confidence characteristic that indicates that an endpoint is a source of a specified type. These endpoints can
* be used as positive samples for training or for a few-shot prompt.
*/
abstract class SourceCharacteristic extends EndpointCharacteristic {
bindingset[this]
SourceCharacteristic() { any() }
abstract Candidate::EndpointType getSourceType();
final override predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
endpointType = this.getSourceType() and
isPositiveIndicator = true and
confidence = maximalConfidence()
}
}
/**
* A high-confidence characteristic that indicates that an endpoint is not a sink of any type. These endpoints can be
* used as negative samples for training or for a few-shot prompt.
*/
abstract class NotASinkCharacteristic extends EndpointCharacteristic {
bindingset[this]
NotASinkCharacteristic() { any() }
override predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
endpointType instanceof Candidate::SinkType and
isPositiveIndicator = false and
confidence = highConfidence()
}
}
/**
* A high-confidence characteristic that indicates that an endpoint is not a source of any type. These endpoints can be
* used as negative samples for training or for a few-shot prompt.
*/
abstract class NotASourceCharacteristic extends EndpointCharacteristic {
bindingset[this]
NotASourceCharacteristic() { any() }
override predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
endpointType instanceof Candidate::SourceType and
isPositiveIndicator = false and
confidence = highConfidence()
}
}
/**
* A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type.
*/
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic,
NotASourceCharacteristic
{
bindingset[this]
NeitherSourceNorSinkCharacteristic() { any() }
final override predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or
NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence)
}
}
/**
* A medium-confidence characteristic that indicates that an endpoint is unlikely to be a sink of any type. These
* endpoints can be excluded from scoring at inference time, both to save time and to avoid false positives. They should
* not, however, be used as negative samples for training or for a few-shot prompt, because they may include a small
* number of sinks.
*/
abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic {
bindingset[this]
LikelyNotASinkCharacteristic() { any() }
override predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
endpointType instanceof Candidate::SinkType and
isPositiveIndicator = false and
confidence = mediumConfidence()
}
}
/**
* A characteristic that indicates not necessarily that an endpoint is not a sink, but rather that it is not a sink
* that's interesting to model in the standard Java libraries. These filters should be removed when extracting sink
* candidates within a user's codebase for customized modeling.
*
* These endpoints should not be used as negative samples for training or for a few-shot prompt, because they are not
* necessarily non-sinks.
*/
abstract class UninterestingToModelCharacteristic extends EndpointCharacteristic {
bindingset[this]
UninterestingToModelCharacteristic() { any() }
override predicate hasImplications(
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
) {
endpointType instanceof Candidate::SinkType and
isPositiveIndicator = false and
confidence = mediumConfidence()
}
}
/**
* Contains default implementations that are derived solely from the `CandidateSig` implementation.
*/
private module DefaultCharacteristicImplementations {
/**
* Endpoints identified as sinks by the `CandidateSig` implementation are sinks with maximal confidence.
*/
private class KnownSinkCharacteristic extends SinkCharacteristic {
string madKind;
Candidate::EndpointType endpointType;
string provenance;
KnownSinkCharacteristic() {
Candidate::isKnownKind(madKind, endpointType) and
// bind "this" to a unique string differing from that of the SinkType classes
this = madKind + "_" + provenance + "_characteristic" and
Candidate::isSink(_, madKind, provenance)
}
override predicate appliesToEndpoint(Candidate::Endpoint e) {
Candidate::isSink(e, madKind, provenance)
}
override Candidate::EndpointType getSinkType() { result = endpointType }
}
private class KnownSourceCharacteristic extends SourceCharacteristic {
string madKind;
Candidate::EndpointType endpointType;
string provenance;
KnownSourceCharacteristic() {
Candidate::isKnownKind(madKind, endpointType) and
// bind "this" to a unique string differing from that of the SinkType classes
this = madKind + "_" + provenance + "_characteristic" and
Candidate::isSource(_, madKind, provenance)
}
override predicate appliesToEndpoint(Candidate::Endpoint e) {
Candidate::isSource(e, madKind, provenance)
}
override Candidate::EndpointType getSourceType() { result = endpointType }
}
/**
* A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
*/
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
NeutralModelCharacteristic() { this = "known non-sink" }
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }
}
/**
* A negative characteristic that indicates that an endpoint is a sanitizer, and thus not a source.
*/
private class IsSanitizerCharacteristic extends NotASourceCharacteristic {
IsSanitizerCharacteristic() { this = "known sanitizer" }
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSanitizer(e, _) }
}
}
}

Просмотреть файл

@ -1,62 +0,0 @@
/**
* This file contains query predicates for use when gathering metrics at scale using Multi Repo
* Variant Analysis.
*/
private import java
private import AutomodelAlertSinkUtil
/**
* Holds if `alertCount` is the number of alerts for the query with ID `queryId` for which the
* sinks correspond to the given `ai-generated` sink model.
*/
query predicate sinkModelCountPerQuery(
string queryId, int alertCount, string package, string type, boolean subtypes, string name,
string signature, string input, string ext, string kind, string provenance
) {
exists(SinkModel s |
sinkModelTallyPerQuery(queryId, alertCount, s) and
s.getProvenance() = "ai-generated" and
s.getPackage() = package and
s.getType() = type and
s.getSubtypes() = subtypes and
s.getName() = name and
s.getSignature() = signature and
s.getInput() = input and
s.getExt() = ext and
s.getKind() = kind and
s.getProvenance() = provenance
)
}
/**
* Holds if `instanceCount` is the number of instances corresponding to the given `ai-generated`
* sink model (as identified by the `package`, `name`, `input`, etc.).
*/
query predicate instanceCount(
int instanceCount, string package, string type, boolean subtypes, string name, string signature,
string input, string ext, string kind, string provenance
) {
exists(SinkModel s |
instanceCount = s.getInstanceCount() and
instanceCount > 0 and
s.getProvenance() = "ai-generated" and
s.getPackage() = package and
s.getType() = type and
s.getSubtypes() = subtypes and
s.getName() = name and
s.getSignature() = signature and
s.getInput() = input and
s.getExt() = ext and
s.getKind() = kind and
s.getProvenance() = provenance
)
}
// MRVA requires a select clause, so we repurpose it to tell us which query predicates had results.
from string hadResults
where
sinkModelCountPerQuery(_, _, _, _, _, _, _, _, _, _, _) and hadResults = "sinkModelCountPerQuery"
or
instanceCount(_, _, _, _, _, _, _, _, _, _) and hadResults = "instanceCount"
select hadResults

Просмотреть файл

@ -1,2 +0,0 @@
---
lastReleaseVersion: 1.0.11

Просмотреть файл

@ -1,10 +0,0 @@
name: codeql/java-automodel-queries
version: 1.0.12-dev
groups:
- java
- automodel
dependencies:
codeql/java-all: ${workspace}
dataExtensions:
- AutomodelCandidateFilter.yml
warnOnImplicitThis: true

Просмотреть файл

@ -1,2 +0,0 @@
testFailures
failures

Просмотреть файл

@ -1,35 +0,0 @@
import java
import AutomodelApplicationModeCharacteristics as Characteristics
import AutomodelExtractionTests
module TestHelper implements TestHelperSig<Characteristics::ApplicationCandidatesImpl> {
Location getEndpointLocation(Characteristics::Endpoint endpoint) {
result = endpoint.asTop().getLocation()
}
predicate isCandidate(
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
string extensibleType
) {
Characteristics::isCandidate(endpoint, _, _, _, name, signature, input, output, _,
extensibleType, _)
}
predicate isPositiveExample(
Characteristics::Endpoint endpoint, string endpointType, string name, string signature,
string input, string output, string extensibleType
) {
Characteristics::isPositiveExample(endpoint, endpointType, _, _, _, name, signature, input,
output, _, extensibleType)
}
predicate isNegativeExample(
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
string extensibleType
) {
Characteristics::isNegativeExample(endpoint, _, _, _, _, _, name, signature, input, output, _,
extensibleType)
}
}
import MakeTest<Extraction<Characteristics::ApplicationCandidatesImpl, TestHelper>>

Просмотреть файл

@ -1,8 +0,0 @@
import hudson.Plugin;
public class PluginImpl extends Plugin {
@Override
public void configure(String name, String value) { // $ sourceModelCandidate=configure(String,String):Parameter[0] sourceModelCandidate=configure(String,String):Parameter[1]
// ...
}
}

Просмотреть файл

@ -1,112 +0,0 @@
package com.github.codeql.test;
import java.io.InputStream;
import java.io.PrintWriter;
import java.nio.file.CopyOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;
import java.io.File;
import java.io.FileFilter;
import java.nio.file.FileVisitOption;
import java.net.URLConnection;
import java.util.concurrent.FutureTask;
class Test {
public static void main(String[] args) throws Exception {
AtomicReference<String> reference = new AtomicReference<>(); // uninteresting (parameterless constructor)
reference.set( // $ sinkModelCandidate=set(Object):Argument[this]
args[0] // $ negativeSinkExample=set(Object):Argument[0] // modeled as a flow step
); // not a source candidate (return type is void)
}
public static void callSupplier(Supplier<String> supplier) {
supplier.get(); // not a source candidate (lambda flow)
}
public static void copyFiles(Path source, Path target, CopyOption option) throws Exception {
Files.copy(
source, // $ positiveSinkExample=copy(Path,Path,CopyOption[]):Argument[0](path-injection)
target, // $ positiveSinkExample=copy(Path,Path,CopyOption[]):Argument[1](path-injection)
option // no candidate (not modeled, but source and target are modeled)
); // $ sourceModelCandidate=copy(Path,Path,CopyOption[]):ReturnValue
}
public static InputStream getInputStream(Path openPath) throws Exception {
return Files.newInputStream(
openPath // $ sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[0] positiveSinkExample=newInputStream(Path,OpenOption[]):Argument[0](path-injection) // sink candidate because "only" ai-modeled, and useful as a candidate in regression testing
); // $ sourceModelCandidate=newInputStream(Path,OpenOption[]):ReturnValue
}
public static InputStream getInputStream(String openPath, String otherPath) throws Exception {
return Test.getInputStream( // the call is not a source candidate (argument to local call)
Paths.get(
openPath, // $ negativeSinkExample=get(String,String[]):Argument[0] // modeled as a flow step
otherPath
) // $ sourceModelCandidate=get(String,String[]):ReturnValue negativeSinkExample=get(String,String[]):Argument[1]
);
}
public static int compareFiles(File f1, File f2) {
return f1.compareTo( // $ negativeSinkExample=compareTo(File):Argument[this]
f2 // $ negativeSinkExample=compareTo(File):Argument[0] // modeled as not a sink
); // not a source candidate (return type is int)
}
public static void FilesWalkExample(Path p, FileVisitOption o) throws Exception {
Files.walk(
p, // $ negativeSinkExample=walk(Path,FileVisitOption[]):Argument[0] // modeled as a flow step
o, // the implicit varargs array is a candidate, annotated on the last line of the call
o // not a candidate (only the first arg corresponding to a varargs array
// is extracted)
); // $ sourceModelCandidate=walk(Path,FileVisitOption[]):ReturnValue sinkModelCandidate=walk(Path,FileVisitOption[]):Argument[1]
}
public static void WebSocketExample(URLConnection c) throws Exception {
c.getInputStream(); // $ sinkModelCandidate=getInputStream():Argument[this] positiveSourceExample=getInputStream():ReturnValue(remote) // not a source candidate (manual modeling)
c.connect(); // $ sinkModelCandidate=connect():Argument[this] // not a source candidate (return type is void)
}
public static void fileFilterExample(File f, FileFilter ff) {
f.listFiles( // $ sinkModelCandidate=listFiles(FileFilter):Argument[this]
ff
); // $ sourceModelCandidate=listFiles(FileFilter):ReturnValue
}
}
class OverrideTest extends Exception {
public void printStackTrace(PrintWriter writer) { // $ sourceModelCandidate=printStackTrace(PrintWriter):Parameter[0]
return;
}
}
class TaskUtils {
public FutureTask getTask() {
FutureTask ft = new FutureTask(() -> {
// ^-- no sink candidate for the `this` qualifier of a constructor
return 42;
});
return ft;
}
}
class MoreTests {
public static void FilesListExample(Path p) throws Exception {
Files.list(
Files.createDirectories(
p // $ positiveSinkExample=createDirectories(Path,FileAttribute[]):Argument[0](path-injection)
) // $ sourceModelCandidate=createDirectories(Path,FileAttribute[]):ReturnValue negativeSinkExample=list(Path):Argument[0] // modeled as a flow step
); // $ sourceModelCandidate=list(Path):ReturnValue
Files.delete(
p // $ sinkModelCandidate=delete(Path):Argument[0] positiveSinkExample=delete(Path):Argument[0](path-injection)
); // not a source candidate (return type is void)
Files.deleteIfExists(
p // $ sinkModelCandidate=deleteIfExists(Path):Argument[0] positiveSinkExample=deleteIfExists(Path):Argument[0](path-injection)
); // not a source candidate (return type is boolean)
}
}

Просмотреть файл

@ -1,7 +0,0 @@
package hudson;
/** Plugin doc */
public class Plugin {
/** Configure method doc */
public void configure(String name, String value) {}
}

Просмотреть файл

@ -1,77 +0,0 @@
import java
import TestUtilities.InlineExpectationsTest
import AutomodelSharedCharacteristics
signature module TestHelperSig<CandidateSig Candidate> {
Location getEndpointLocation(Candidate::Endpoint e);
predicate isCandidate(
Candidate::Endpoint e, string name, string signature, string input, string output,
string extensibleType
);
predicate isPositiveExample(
Candidate::Endpoint e, string endpointType, string name, string signature, string input,
string output, string extensibleType
);
predicate isNegativeExample(
Candidate::Endpoint e, string name, string signature, string input, string output,
string extensibleType
);
}
module Extraction<CandidateSig Candidate, TestHelperSig<Candidate> TestHelper> implements TestSig {
string getARelevantTag() {
result in [
"sourceModelCandidate", "sinkModelCandidate", // a candidate source/sink
"positiveSourceExample", "positiveSinkExample", // a known source/sink
"negativeSourceExample", "negativeSinkExample" // a known non-source/non-sink
]
}
/**
* If `extensibleType` is `sourceModel` then the result is `ifSource`, if it
* is `sinkModel` then the result is `ifSink`.
*/
bindingset[extensibleType, ifSource, ifSink]
private string ifSource(string extensibleType, string ifSource, string ifSink) {
extensibleType = "sourceModel" and result = ifSource
or
extensibleType = "sinkModel" and result = ifSink
}
additional predicate selectEndpoint(
Candidate::Endpoint endpoint, string name, string signature, string input, string output,
string extensibleType, string tag, string suffix
) {
TestHelper::isCandidate(endpoint, name, signature, input, output, extensibleType) and
tag = ifSource(extensibleType, "sourceModelCandidate", "sinkModelCandidate") and
suffix = ""
or
TestHelper::isNegativeExample(endpoint, name, signature, input, output, extensibleType) and
tag = "negative" + ifSource(extensibleType, "Source", "Sink") + "Example" and
suffix = ""
or
exists(string endpointType |
TestHelper::isPositiveExample(endpoint, endpointType, name, signature, input, output,
extensibleType) and
tag = "positive" + ifSource(extensibleType, "Source", "Sink") + "Example" and
suffix = "(" + endpointType + ")"
)
}
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(
Candidate::Endpoint endpoint, string name, string signature, string input, string output,
string extensibleType, string suffix
|
selectEndpoint(endpoint, name, signature, input, output, extensibleType, tag, suffix)
|
TestHelper::getEndpointLocation(endpoint) = location and
endpoint.toString() = element and
// for source models only the output is relevant, and vice versa for sink models
value = name + signature + ":" + ifSource(extensibleType, output, input) + suffix
)
}
}

Просмотреть файл

@ -1,2 +0,0 @@
testFailures
failures

Просмотреть файл

@ -1,35 +0,0 @@
import java
import AutomodelFrameworkModeCharacteristics as Characteristics
import AutomodelExtractionTests
module TestHelper implements TestHelperSig<Characteristics::FrameworkCandidatesImpl> {
Location getEndpointLocation(Characteristics::Endpoint endpoint) {
result = endpoint.asTop().getLocation()
}
predicate isCandidate(
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
string extensibleType
) {
Characteristics::isCandidate(endpoint, _, _, _, name, signature, input, output, _,
extensibleType, _)
}
predicate isPositiveExample(
Characteristics::Endpoint endpoint, string endpointType, string name, string signature,
string input, string output, string extensibleType
) {
Characteristics::isPositiveExample(endpoint, endpointType, _, _, _, name, signature, input,
output, _, extensibleType)
}
predicate isNegativeExample(
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
string extensibleType
) {
Characteristics::isNegativeExample(endpoint, _, _, _, _, _, name, signature, input, output, _,
extensibleType)
}
}
import MakeTest<Extraction<Characteristics::FrameworkCandidatesImpl, TestHelper>>

Просмотреть файл

@ -1,15 +0,0 @@
package com.github.codeql.test;
public class MyWriter extends java.io.Writer {
@Override
public void write(char[] cbuf, int off, int len) { // $ sinkModelCandidate=write(char[],int,int):Argument[this] positiveSinkExample=write(char[],int,int):Argument[0](file-content-store) sourceModelCandidate=write(char[],int,int):Parameter[this] sourceModelCandidate=write(char[],int,int):Parameter[0]
}
@Override
public void close() { // $ sinkModelCandidate=close():Argument[this] sourceModelCandidate=close():Parameter[this]
}
@Override
public void flush() { // $ sinkModelCandidate=flush():Argument[this] sourceModelCandidate=flush():Parameter[this]
}
}

Просмотреть файл

@ -1,10 +0,0 @@
package com.github.codeql.test;
/**
* No candidates in this class, as it's not public!
*/
class NonPublicClass {
public void noCandidates(String here) {
System.out.println(here);
}
}

Просмотреть файл

@ -1,27 +0,0 @@
package com.github.codeql.test;
public class PublicClass {
public void stuff(String arg) { // $ sinkModelCandidate=stuff(String):Argument[this] sourceModelCandidate=stuff(String):Parameter[this] sinkModelCandidate=stuff(String):Argument[0] sourceModelCandidate=stuff(String):Parameter[0] // source candidates because it is an overrideable method
System.out.println(arg);
}
public static void staticStuff(String arg) { // $ sinkModelCandidate=staticStuff(String):Argument[0] // `arg` is not a source candidate (not overrideabe); `this` is not a candidate (static method)
System.out.println(arg);
}
protected void nonPublicStuff(String arg) { // $ sinkModelCandidate=nonPublicStuff(String):Argument[this] sourceModelCandidate=nonPublicStuff(String):Parameter[this] sinkModelCandidate=nonPublicStuff(String):Argument[0] sourceModelCandidate=nonPublicStuff(String):Parameter[0]
System.out.println(arg);
}
void packagePrivateStuff(String arg) { // no candidates because the method is not public
System.out.println(arg);
}
public PublicClass(Object input) { // $ sourceModelCandidate=PublicClass(Object):ReturnValue sinkModelCandidate=PublicClass(Object):Argument[0] // `this` is not a candidate because it is a constructor
}
// `input` and `input` are source candidates, but not sink candidates (is-style method)
public Boolean isIgnored(Object input) { // $ negativeSinkExample=isIgnored(Object):Argument[this] sourceModelCandidate=isIgnored(Object):Parameter[this] negativeSinkExample=isIgnored(Object):Argument[0] sourceModelCandidate=isIgnored(Object):Parameter[0]
return false;
}
}

Просмотреть файл

@ -1,9 +0,0 @@
package com.github.codeql.test;
public interface PublicInterface {
public int stuff(String arg); // $ sinkModelCandidate=stuff(String):Argument[this] sourceModelCandidate=stuff(String):Parameter[this] sinkModelCandidate=stuff(String):Argument[0] sourceModelCandidate=stuff(String):Parameter[0] // result is _not_ a source candidate source (primitive return type)
public static void staticStuff(String arg) { // $ sinkModelCandidate=staticStuff(String):Argument[0] // not a source candidate (static method)
System.out.println(arg);
}
}

Просмотреть файл

@ -1,13 +0,0 @@
package java.io;
public class File {
public int compareTo( // $ negativeSinkExample=compareTo(File):Argument[this] sourceModelCandidate=compareTo(File):Parameter[this] // modeled as neutral for sinks
File pathname // $ negativeSinkExample=compareTo(File):Argument[0] sourceModelCandidate=compareTo(File):Parameter[0] // modeled as neutral for sinks
) {
return 0;
}
public boolean setLastModified(long time) { // $ sinkModelCandidate=setLastModified(long):Argument[this] sourceModelCandidate=setLastModified(long):Parameter[this] // time is not a candidate (primitive type)
return false;
} // return value is not a source candidate because it's a primitive
}

Просмотреть файл

@ -1,31 +0,0 @@
package java.nio.file;
import java.io.InputStream;
import java.io.FileInputStream;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.OpenOption;
public class Files {
public static void copy( // method result is not a candidate source (void)
Path source, // $ positiveSinkExample=copy(Path,OutputStream):Argument[0](path-injection) // manual model exists
OutputStream out // $ sinkModelCandidate=copy(Path,OutputStream):Argument[1]
/* NB: may be worthwhile to implement the
same behavior as in application mode where out would not be a
candidate because there already is a model for another parameter of
the same method and we assume that methods are always modeled
completely.
*/
) throws IOException {
// ...
}
public static InputStream newInputStream( // $ sourceModelCandidate=newInputStream(Path,OpenOption[]):ReturnValue
Path openPath, // $ positiveSinkExample=newInputStream(Path,OpenOption[]):Argument[0](path-injection) sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[0] // known sink, but still a candidate (ai-modeled, and useful as a candidate in regression testing)
OpenOption... options // $ sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[1]
) throws IOException {
return new FileInputStream(openPath.toFile());
}
}

Просмотреть файл

@ -1,4 +0,0 @@
---
category: breaking
---
* CodeQL package management is now generally available, and all GitHub-produced CodeQL packages have had their version numbers increased to 1.0.0.

Просмотреть файл

@ -1,13 +0,0 @@
name: codeql/java-automodel-tests
version: 1.0.0-dev
groups:
- java
- automodel
- test
dependencies:
codeql/java-all: ${workspace}
codeql/java-automodel-queries: ${workspace}
codeql/java-tests: ${workspace}
extractor: java
tests: .
warnOnImplicitThis: true