зеркало из https://github.com/github/codeql.git
Java: drop automodel queries
This commit is contained in:
Родитель
aea7c3fc81
Коммит
4208f031e3
|
@ -1,197 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
help="Usage: ./publish [--override-release] [--dry-run]
|
||||
Publish the automodel query pack.
|
||||
|
||||
If no arguments are provided, publish the version of the codeql repo specified by the latest official release of the codeml-automodel repo.
|
||||
If the --override-release argument is provided, your current local HEAD is used (for unofficial releases or patching).
|
||||
If the --dry-run argument is provided, the release is not published (for testing purposes)."
|
||||
|
||||
# Echo the help message
|
||||
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
|
||||
echo "$help"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check the number of arguments are valid
|
||||
if [ $# -gt 2 ]; then
|
||||
echo "Error: Invalid arguments provided"
|
||||
echo "$help"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
OVERRIDE_RELEASE=0
|
||||
DRY_RUN=0
|
||||
for arg in "$@"
|
||||
do
|
||||
case $arg in
|
||||
--override-release)
|
||||
OVERRIDE_RELEASE=1
|
||||
shift # Remove --override-release from processing
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
shift # Remove --dry-run from processing
|
||||
;;
|
||||
*)
|
||||
echo "Error: Invalid argument provided: $arg"
|
||||
echo "$help"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Describe what we're about to do based on the command-line arguments
|
||||
if [ $OVERRIDE_RELEASE = 1 ]; then
|
||||
echo "Publishing the current HEAD of the automodel repo"
|
||||
else
|
||||
echo "Publishing the version of the automodel repo specified by the latest official release of the codeml-automodel repo"
|
||||
fi
|
||||
if [ $DRY_RUN = 1 ]; then
|
||||
echo "Dry run: we will step through the process but we won't publish the query pack"
|
||||
else
|
||||
echo "Not a dry run! Publishing the query pack"
|
||||
fi
|
||||
|
||||
# If we're publishing the codeml-automodel release then we will checkout the sha specified in the release.
|
||||
# So we need to check that there are no uncommitted changes in the local branch.
|
||||
# And, if we're publishing the current HEAD, it's cleaner to ensure that there are no uncommitted changes.
|
||||
if ! git diff --quiet; then
|
||||
echo "Error: Uncommitted changes exist. Please commit or stash your changes before publishing."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check the above environment variables are set
|
||||
if [ -z "${GITHUB_TOKEN}" ]; then
|
||||
echo "Error: GITHUB_TOKEN environment variable not set. Please set this to a token with package:write permissions to codeql."
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "${GH_TOKEN}" ]; then
|
||||
echo "Error: GH_TOKEN environment variable not set. Please set this to a token with repo permissions to github/codeml-automodel."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Get the sha of the previous release, i.e. the last commit to the main branch that updated the query pack version
|
||||
PREVIOUS_RELEASE_SHA=$(git rev-list -n 1 main -- ./src/qlpack.yml)
|
||||
if [ -z "$PREVIOUS_RELEASE_SHA" ]; then
|
||||
echo "Error: Could not get the sha of the previous release of codeml-automodel query pack"
|
||||
exit 1
|
||||
else
|
||||
echo "Previous query-pack release sha: $PREVIOUS_RELEASE_SHA"
|
||||
fi
|
||||
|
||||
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
|
||||
CURRENT_SHA=$(git rev-parse HEAD)
|
||||
|
||||
if [ $OVERRIDE_RELEASE = 1 ]; then
|
||||
# Check that the current HEAD is downstream from PREVIOUS_RELEASE_SHA
|
||||
if ! git merge-base --is-ancestor "$PREVIOUS_RELEASE_SHA" "$CURRENT_SHA"; then
|
||||
echo "Error: The current HEAD is not downstream from the previous release"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
# Get the latest release of codeml-automodel
|
||||
TAG_NAME=$(gh api -H 'Accept: application/vnd.github+json' -H 'X-GitHub-Api-Version: 2022-11-28' /repos/github/codeml-automodel/releases/latest | jq -r .tag_name)
|
||||
# Check TAG_NAME is not empty
|
||||
if [ -z "$TAG_NAME" ]; then
|
||||
echo "Error: Could not get latest release of codeml-automodel"
|
||||
exit 1
|
||||
fi
|
||||
echo "Updating to latest automodel release: $TAG_NAME"
|
||||
# Before downloading, delete any existing release.zip, and ignore failure if not present
|
||||
rm release.zip || true
|
||||
gh release download $TAG_NAME -A zip -O release.zip --repo 'https://github.com/github/codeml-automodel'
|
||||
# Before unzipping, delete any existing release directory, and ignore failure if not present
|
||||
rm -rf release || true
|
||||
unzip -o release.zip -d release
|
||||
REVISION=$(jq -r '.["codeql-sha"]' release/codeml-automodel*/codeml-automodel-release.json)
|
||||
echo "The latest codeml-automodel release specifies the codeql sha $REVISION"
|
||||
# Check that REVISION is downstream from PREVIOUS_RELEASE_SHA
|
||||
if ! git merge-base --is-ancestor "$PREVIOUS_RELEASE_SHA" "$REVISION"; then
|
||||
echo "Error: The codeql version $REVISION is not downstream of the query-pack version $PREVIOUS_RELEASE_SHA"
|
||||
exit 1
|
||||
fi
|
||||
# Get the version of the codeql code specified by the codeml-automodel release
|
||||
git checkout "$REVISION"
|
||||
fi
|
||||
|
||||
# Get the absolute path of the automodel repo
|
||||
AUTOMODEL_ROOT="$(readlink -f "$(dirname $0)")"
|
||||
# Get the absolute path of the workspace root
|
||||
WORKSPACE_ROOT="$AUTOMODEL_ROOT/../../.."
|
||||
# Specify the groups of queries to test and publish
|
||||
GRPS="automodel,-test"
|
||||
|
||||
# Install the codeql gh extension
|
||||
gh extensions install github/gh-codeql
|
||||
|
||||
pushd "$AUTOMODEL_ROOT"
|
||||
echo Testing automodel queries
|
||||
gh codeql test run test
|
||||
popd
|
||||
|
||||
pushd "$WORKSPACE_ROOT"
|
||||
echo "Preparing the release"
|
||||
gh codeql pack release --groups $GRPS -v
|
||||
|
||||
if [ $DRY_RUN = 1 ]; then
|
||||
echo "Dry run: not publishing the query pack"
|
||||
gh codeql pack publish --groups $GRPS --dry-run -v
|
||||
else
|
||||
echo "Not a dry run! Publishing the query pack"
|
||||
gh codeql pack publish --groups $GRPS -v
|
||||
fi
|
||||
|
||||
echo "Bumping versions"
|
||||
gh codeql pack post-release --groups $GRPS -v
|
||||
popd
|
||||
|
||||
# The above commands update
|
||||
# ./src/CHANGELOG.md
|
||||
# ./src/codeql-pack.release.yml
|
||||
# ./src/qlpack.yml
|
||||
# and add a new file
|
||||
# ./src/change-notes/released/<version>.md
|
||||
|
||||
# Get the filename of the most recently created file in ./src/change-notes/released/*.md
|
||||
# This will be the file for the new release
|
||||
NEW_CHANGE_NOTES_FILE=$(ls -t ./src/change-notes/released/*.md | head -n 1)
|
||||
|
||||
# Make a copy of the modified files
|
||||
mv ./src/CHANGELOG.md ./src/CHANGELOG.md.dry-run
|
||||
mv ./src/codeql-pack.release.yml ./src/codeql-pack.release.yml.dry-run
|
||||
mv ./src/qlpack.yml ./src/qlpack.yml.dry-run
|
||||
mv "$NEW_CHANGE_NOTES_FILE" ./src/change-notes/released.md.dry-run
|
||||
|
||||
if [ $OVERRIDE_RELEASE = 1 ]; then
|
||||
# Restore the original files
|
||||
git checkout ./src/CHANGELOG.md
|
||||
git checkout ./src/codeql-pack.release.yml
|
||||
git checkout ./src/qlpack.yml
|
||||
else
|
||||
# Restore the original files
|
||||
git checkout "$CURRENT_BRANCH" --force
|
||||
fi
|
||||
|
||||
if [ $DRY_RUN = 1 ]; then
|
||||
echo "Inspect the updated dry-run version files:"
|
||||
ls -l ./src/*.dry-run
|
||||
ls -l ./src/change-notes/*.dry-run
|
||||
else
|
||||
# Add the updated files to the current branch
|
||||
echo "Adding the version changes"
|
||||
mv -f ./src/CHANGELOG.md.dry-run ./src/CHANGELOG.md
|
||||
mv -f ./src/codeql-pack.release.yml.dry-run ./src/codeql-pack.release.yml
|
||||
mv -f ./src/qlpack.yml.dry-run ./src/qlpack.yml
|
||||
mv -f ./src/change-notes/released.md.dry-run "$NEW_CHANGE_NOTES_FILE"
|
||||
git add ./src/CHANGELOG.md
|
||||
git add ./src/codeql-pack.release.yml
|
||||
git add ./src/qlpack.yml
|
||||
git add "$NEW_CHANGE_NOTES_FILE"
|
||||
echo "Added the following updated version files to the current branch:"
|
||||
git status -s
|
||||
echo "To complete the release, please commit these files and merge to the main branch"
|
||||
fi
|
||||
|
||||
echo "Done"
|
|
@ -1,183 +0,0 @@
|
|||
private import java
|
||||
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
|
||||
private import semmle.code.java.dataflow.TaintTracking
|
||||
private import semmle.code.java.security.RequestForgeryConfig
|
||||
private import semmle.code.java.security.CommandLineQuery
|
||||
private import semmle.code.java.security.SqlConcatenatedQuery
|
||||
private import semmle.code.java.security.SqlInjectionQuery
|
||||
private import semmle.code.java.security.UrlRedirectQuery
|
||||
private import semmle.code.java.security.TaintedPathQuery
|
||||
private import semmle.code.java.security.SqlInjectionQuery
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
private newtype TSinkModel =
|
||||
MkSinkModel(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input, string kind, string provenance
|
||||
) {
|
||||
ExternalFlow::sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance,
|
||||
_)
|
||||
}
|
||||
|
||||
class SinkModel extends TSinkModel {
|
||||
string package;
|
||||
string type;
|
||||
boolean subtypes;
|
||||
string name;
|
||||
string signature;
|
||||
string ext;
|
||||
string input;
|
||||
string kind;
|
||||
string provenance;
|
||||
|
||||
SinkModel() {
|
||||
this = MkSinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance)
|
||||
}
|
||||
|
||||
/** Gets the package for this sink model. */
|
||||
string getPackage() { result = package }
|
||||
|
||||
/** Gets the type for this sink model. */
|
||||
string getType() { result = type }
|
||||
|
||||
/** Gets whether this sink model considers subtypes. */
|
||||
boolean getSubtypes() { result = subtypes }
|
||||
|
||||
/** Gets the name for this sink model. */
|
||||
string getName() { result = name }
|
||||
|
||||
/** Gets the signature for this sink model. */
|
||||
string getSignature() { result = signature }
|
||||
|
||||
/** Gets the input for this sink model. */
|
||||
string getInput() { result = input }
|
||||
|
||||
/** Gets the extension for this sink model. */
|
||||
string getExt() { result = ext }
|
||||
|
||||
/** Gets the kind for this sink model. */
|
||||
string getKind() { result = kind }
|
||||
|
||||
/** Gets the provenance for this sink model. */
|
||||
string getProvenance() { result = provenance }
|
||||
|
||||
/** Gets the number of instances of this sink model. */
|
||||
int getInstanceCount() { result = count(PotentialSinkModelExpr p | p.getSinkModel() = this) }
|
||||
|
||||
/** Gets a string representation of this sink model. */
|
||||
string toString() {
|
||||
result =
|
||||
"SinkModel(" + package + ", " + type + ", " + subtypes + ", " + name + ", " + signature + ", "
|
||||
+ ext + ", " + input + ", " + kind + ", " + provenance + ")"
|
||||
}
|
||||
|
||||
/** Gets a string representation of this sink model as it would appear in a Models-as-Data file. */
|
||||
string getRepr() {
|
||||
result =
|
||||
"\"" + package + "\", \"" + type + "\", " + pyBool(subtypes) + ", \"" + name + "\", \"" +
|
||||
signature + "\", \"" + ext + "\", \"" + input + "\", \"" + kind + "\", \"" + provenance +
|
||||
"\""
|
||||
}
|
||||
}
|
||||
|
||||
/** An expression that may correspond to a sink model. */
|
||||
class PotentialSinkModelExpr extends Expr {
|
||||
/**
|
||||
* Holds if this expression has the given signature. The signature should contain enough
|
||||
* information to determine a corresponding sink model, if one exists.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
predicate hasSignature(
|
||||
string package, string type, boolean subtypes, string name, string signature, string input
|
||||
) {
|
||||
exists(Call call, Callable callable, int argIdx |
|
||||
call.getCallee().getSourceDeclaration() = callable and
|
||||
(
|
||||
this = call.getArgument(argIdx)
|
||||
or
|
||||
this = call.getQualifier() and argIdx = -1
|
||||
) and
|
||||
(if argIdx = -1 then input = "Argument[this]" else input = "Argument[" + argIdx + "]") and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and
|
||||
subtypes = considerSubtypes(callable) and
|
||||
name = callable.getName() and
|
||||
signature = ExternalFlow::paramsString(callable)
|
||||
)
|
||||
}
|
||||
|
||||
/** Gets a sink model that corresponds to this expression. */
|
||||
SinkModel getSinkModel() {
|
||||
this.hasSignature(result.getPackage(), result.getType(), result.getSubtypes(), result.getName(),
|
||||
result.getSignature(), result.getInput())
|
||||
}
|
||||
}
|
||||
|
||||
private string pyBool(boolean b) {
|
||||
b = true and result = "True"
|
||||
or
|
||||
b = false and result = "False"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a string representation of the existing sink model at the expression `e`, in the format in
|
||||
* which it would appear in a Models-as-Data file. Also restricts the provenance of the sink model
|
||||
* to be `ai-generated`.
|
||||
*/
|
||||
string getSinkModelRepr(PotentialSinkModelExpr e) {
|
||||
result = e.getSinkModel().getRepr() and
|
||||
e.getSinkModel().getProvenance() = "ai-generated"
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the string representation of a sink model in a format suitable for appending to an alert
|
||||
* message.
|
||||
*/
|
||||
string getSinkModelQueryRepr(PotentialSinkModelExpr e) {
|
||||
result = "\nsinkModel: " + getSinkModelRepr(e)
|
||||
}
|
||||
|
||||
/**
|
||||
* A parameterised module that takes a dataflow config, and exposes a predicate for counting the
|
||||
* number of AI-generated sink models that appear in alerts for that query.
|
||||
*/
|
||||
private module SinkTallier<DataFlow::ConfigSig Config> {
|
||||
module ConfigFlow = TaintTracking::Global<Config>;
|
||||
|
||||
predicate getSinkModelCount(int c, SinkModel s) {
|
||||
s = any(ConfigFlow::PathNode sink).getNode().asExpr().(PotentialSinkModelExpr).getSinkModel() and
|
||||
c =
|
||||
strictcount(ConfigFlow::PathNode sink |
|
||||
ConfigFlow::flowPath(_, sink) and
|
||||
s = sink.getNode().asExpr().(PotentialSinkModelExpr).getSinkModel()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
predicate sinkModelTallyPerQuery(string queryName, int alertCount, SinkModel sinkModel) {
|
||||
queryName = "java/request-forgery" and
|
||||
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/command-line-injection" and
|
||||
SinkTallier<InputToArgumentToExecFlowConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/concatenated-sql-query" and
|
||||
SinkTallier<UncontrolledStringBuilderSourceFlowConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/ssrf" and
|
||||
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/path-injection" and
|
||||
SinkTallier<TaintedPathConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/unvalidated-url-redirection" and
|
||||
SinkTallier<UrlRedirectConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
or
|
||||
queryName = "java/sql-injection" and
|
||||
SinkTallier<QueryInjectionFlowConfig>::getSinkModelCount(alertCount, sinkModel)
|
||||
}
|
||||
|
||||
predicate sinkModelTally(int alertCount, SinkModel sinkModel) {
|
||||
sinkModelTallyPerQuery(_, _, sinkModel) and
|
||||
alertCount = sum(int c | sinkModelTallyPerQuery(_, c, sinkModel))
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
/**
|
||||
* @name Number of alerts per sink model
|
||||
* @description Counts the number of alerts using `ai-generated` sink models.
|
||||
* @kind table
|
||||
* @id java/ml/metrics-count-alerts-per-sink-model
|
||||
* @tags internal automodel metrics
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
from int alertCount, SinkModel s
|
||||
where sinkModelTally(alertCount, s) and s.getProvenance() = "ai-generated"
|
||||
select alertCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
|
||||
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
|
||||
s.getKind() as kind, s.getProvenance() as provenance order by alertCount desc
|
|
@ -1,19 +0,0 @@
|
|||
/**
|
||||
* @name Number of alerts per sink model and query
|
||||
* @description Counts the number of alerts per query using `ai-generated` sink models.
|
||||
* @kind table
|
||||
* @id java/ml/metrics-count-alerts-per-sink-model-and-query
|
||||
* @tags internal automodel metrics
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
from string queryId, int alertCount, SinkModel s
|
||||
where
|
||||
sinkModelTallyPerQuery(queryId, alertCount, s) and
|
||||
s.getProvenance() = "ai-generated"
|
||||
select queryId, alertCount, s.getPackage() as package, s.getType() as type,
|
||||
s.getSubtypes() as subtypes, s.getName() as name, s.getSignature() as signature,
|
||||
s.getInput() as input, s.getExt() as ext, s.getKind() as kind, s.getProvenance() as provenance
|
||||
order by queryId, alertCount desc
|
|
@ -1,677 +0,0 @@
|
|||
/**
|
||||
* For internal use only.
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import semmle.code.Location as Location
|
||||
private import semmle.code.java.dataflow.DataFlow
|
||||
private import semmle.code.java.dataflow.TaintTracking
|
||||
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
|
||||
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
|
||||
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
|
||||
private import semmle.code.java.Expr as Expr
|
||||
private import semmle.code.java.security.QueryInjection
|
||||
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
|
||||
private import AutomodelJavaUtil as AutomodelJavaUtil
|
||||
private import semmle.code.java.security.PathSanitizer as PathSanitizer
|
||||
import AutomodelSharedCharacteristics as SharedCharacteristics
|
||||
import AutomodelEndpointTypes as AutomodelEndpointTypes
|
||||
|
||||
newtype JavaRelatedLocationType =
|
||||
CallContext() or
|
||||
MethodDoc() or
|
||||
ClassDoc()
|
||||
|
||||
newtype TApplicationModeEndpoint =
|
||||
TExplicitArgument(Call call, DataFlow::Node arg) {
|
||||
AutomodelJavaUtil::isFromSource(call) and
|
||||
exists(Argument argExpr |
|
||||
arg.asExpr() = argExpr and call = argExpr.getCall() and not argExpr.isVararg()
|
||||
) and
|
||||
not AutomodelJavaUtil::isUnexploitableType(arg.getType())
|
||||
} or
|
||||
TInstanceArgument(Call call, DataFlow::Node arg) {
|
||||
AutomodelJavaUtil::isFromSource(call) and
|
||||
arg = DataFlow::getInstanceArgument(call) and
|
||||
not call instanceof ConstructorCall and
|
||||
not AutomodelJavaUtil::isUnexploitableType(arg.getType())
|
||||
} or
|
||||
TImplicitVarargsArray(Call call, DataFlow::ImplicitVarargsArray arg, int idx) {
|
||||
AutomodelJavaUtil::isFromSource(call) and
|
||||
call = arg.getCall() and
|
||||
idx = call.getCallee().getVaragsParameterIndex() and
|
||||
not AutomodelJavaUtil::isUnexploitableType(arg.getType())
|
||||
} or
|
||||
TMethodReturnValue(MethodCall call) {
|
||||
AutomodelJavaUtil::isFromSource(call) and
|
||||
not AutomodelJavaUtil::isUnexploitableType(call.getType())
|
||||
} or
|
||||
TOverriddenParameter(Parameter p, Method overriddenMethod) {
|
||||
AutomodelJavaUtil::isFromSource(p) and
|
||||
p.getCallable().(Method).overrides(overriddenMethod)
|
||||
}
|
||||
|
||||
/**
|
||||
* An endpoint is a node that is a candidate for modeling.
|
||||
*/
|
||||
abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint {
|
||||
/**
|
||||
* Gets the callable to be modeled that this endpoint represents.
|
||||
*/
|
||||
abstract Callable getCallable();
|
||||
|
||||
/**
|
||||
* Gets the input (if any) for this endpoint, eg.: `Argument[0]`.
|
||||
*
|
||||
* For endpoints that are source candidates, this will be `none()`.
|
||||
*/
|
||||
abstract string getMaDInput();
|
||||
|
||||
/**
|
||||
* Gets the output (if any) for this endpoint, eg.: `ReturnValue`.
|
||||
*
|
||||
* For endpoints that are sink candidates, this will be `none()`.
|
||||
*/
|
||||
abstract string getMaDOutput();
|
||||
|
||||
abstract Top asTop();
|
||||
|
||||
/**
|
||||
* Converts the endpoint to a node that can be used in a data flow graph.
|
||||
*/
|
||||
abstract DataFlow::Node asNode();
|
||||
|
||||
string getExtensibleType() {
|
||||
if not exists(this.getMaDInput()) and exists(this.getMaDOutput())
|
||||
then result = "sourceModel"
|
||||
else
|
||||
if exists(this.getMaDInput()) and not exists(this.getMaDOutput())
|
||||
then result = "sinkModel"
|
||||
else none() // if both exist, it would be a summaryModel (not yet supported)
|
||||
}
|
||||
|
||||
abstract string toString();
|
||||
}
|
||||
|
||||
class TCallArgument = TExplicitArgument or TInstanceArgument or TImplicitVarargsArray;
|
||||
|
||||
/**
|
||||
* An endpoint that represents an "argument" to a call in a broad sense, including
|
||||
* both explicit arguments and the instance argument.
|
||||
*/
|
||||
abstract class CallArgument extends ApplicationModeEndpoint, TCallArgument {
|
||||
Call call;
|
||||
DataFlow::Node arg;
|
||||
|
||||
override Callable getCallable() { result = call.getCallee().getSourceDeclaration() }
|
||||
|
||||
override string getMaDOutput() { none() }
|
||||
|
||||
override DataFlow::Node asNode() { result = arg }
|
||||
|
||||
Call getCall() { result = call }
|
||||
|
||||
override string toString() { result = arg.toString() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An endpoint that represents an explicit argument to a call.
|
||||
*/
|
||||
class ExplicitArgument extends CallArgument, TExplicitArgument {
|
||||
ExplicitArgument() { this = TExplicitArgument(call, arg) }
|
||||
|
||||
private int getArgIndex() { this.asTop() = call.getArgument(result) }
|
||||
|
||||
override string getMaDInput() { result = "Argument[" + this.getArgIndex() + "]" }
|
||||
|
||||
override Top asTop() { result = arg.asExpr() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An endpoint that represents the instance argument to a call.
|
||||
*/
|
||||
class InstanceArgument extends CallArgument, TInstanceArgument {
|
||||
InstanceArgument() { this = TInstanceArgument(call, arg) }
|
||||
|
||||
override string getMaDInput() { result = "Argument[this]" }
|
||||
|
||||
override Top asTop() { if exists(arg.asExpr()) then result = arg.asExpr() else result = call }
|
||||
|
||||
override string toString() { result = arg.toString() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An endpoint that represents an implicit varargs array.
|
||||
* We choose to represent the varargs array as a single endpoint, rather than as multiple endpoints.
|
||||
*
|
||||
* This avoids the problem of having to deal with redundant endpoints downstream.
|
||||
*
|
||||
* In order to be able to distinguish between varargs endpoints and regular endpoints, we export the `isVarargsArray`
|
||||
* meta data field in the extraction queries.
|
||||
*/
|
||||
class ImplicitVarargsArray extends CallArgument, TImplicitVarargsArray {
|
||||
int idx;
|
||||
|
||||
ImplicitVarargsArray() { this = TImplicitVarargsArray(call, arg, idx) }
|
||||
|
||||
override string getMaDInput() { result = "Argument[" + idx + "]" }
|
||||
|
||||
override Top asTop() { result = call }
|
||||
}
|
||||
|
||||
/**
|
||||
* An endpoint that represents a method call. The `ReturnValue` of a method call
|
||||
* may be a source.
|
||||
*/
|
||||
class MethodReturnValue extends ApplicationModeEndpoint, TMethodReturnValue {
|
||||
MethodCall call;
|
||||
|
||||
MethodReturnValue() { this = TMethodReturnValue(call) }
|
||||
|
||||
override Callable getCallable() { result = call.getCallee().getSourceDeclaration() }
|
||||
|
||||
override string getMaDInput() { none() }
|
||||
|
||||
override string getMaDOutput() { result = "ReturnValue" }
|
||||
|
||||
override Top asTop() { result = call }
|
||||
|
||||
override DataFlow::Node asNode() { result.asExpr() = call }
|
||||
|
||||
override string toString() { result = call.toString() }
|
||||
}
|
||||
|
||||
/**
|
||||
* An endpoint that represents a parameter of an overridden method that may be
|
||||
* a source.
|
||||
*/
|
||||
class OverriddenParameter extends ApplicationModeEndpoint, TOverriddenParameter {
|
||||
Parameter p;
|
||||
Method overriddenMethod;
|
||||
|
||||
OverriddenParameter() { this = TOverriddenParameter(p, overriddenMethod) }
|
||||
|
||||
override Callable getCallable() {
|
||||
// NB: we're returning the overridden callable here. This means that the
|
||||
// candidate model will be about the overridden method, not the overriding
|
||||
// method. This is a more general model, that also applies to other
|
||||
// subclasses of the overridden class.
|
||||
result = overriddenMethod.getSourceDeclaration()
|
||||
}
|
||||
|
||||
private int getArgIndex() { p.getCallable().getParameter(result) = p }
|
||||
|
||||
override string getMaDInput() { none() }
|
||||
|
||||
override string getMaDOutput() { result = "Parameter[" + this.getArgIndex() + "]" }
|
||||
|
||||
override Top asTop() { result = p }
|
||||
|
||||
override DataFlow::Node asNode() { result.(DataFlow::ParameterNode).asParameter() = p }
|
||||
|
||||
override string toString() { result = p.toString() }
|
||||
}
|
||||
|
||||
/**
|
||||
* A candidates implementation.
|
||||
*
|
||||
* Some important notes:
|
||||
* - This mode is using arguments as endpoints.
|
||||
* - We use the `CallContext` (the surrounding call expression) as related location.
|
||||
*/
|
||||
module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
// for documentation of the implementations here, see the QLDoc in the CandidateSig signature module.
|
||||
class Endpoint = ApplicationModeEndpoint;
|
||||
|
||||
class EndpointType = AutomodelEndpointTypes::EndpointType;
|
||||
|
||||
class SinkType = AutomodelEndpointTypes::SinkType;
|
||||
|
||||
class SourceType = AutomodelEndpointTypes::SourceType;
|
||||
|
||||
class RelatedLocation = Location::Top;
|
||||
|
||||
class RelatedLocationType = JavaRelatedLocationType;
|
||||
|
||||
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
|
||||
predicate isSanitizer(Endpoint e, EndpointType t) {
|
||||
exists(t) and
|
||||
AutomodelJavaUtil::isUnexploitableType([
|
||||
// for most endpoints, we can get the type from the node
|
||||
e.asNode().getType(),
|
||||
// but not for calls to void methods, where we need to go via the AST
|
||||
e.asTop().(Expr).getType()
|
||||
])
|
||||
or
|
||||
t instanceof AutomodelEndpointTypes::PathInjectionSinkType and
|
||||
e.asNode() instanceof PathSanitizer::PathInjectionSanitizer
|
||||
}
|
||||
|
||||
RelatedLocation asLocation(Endpoint e) { result = e.asTop() }
|
||||
|
||||
predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2;
|
||||
|
||||
predicate isSink(Endpoint e, string kind, string provenance) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input
|
||||
|
|
||||
sinkSpec(e, package, type, subtypes, name, signature, ext, input) and
|
||||
ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, kind,
|
||||
provenance, _)
|
||||
)
|
||||
or
|
||||
isCustomSink(e, kind) and provenance = "custom-sink"
|
||||
}
|
||||
|
||||
predicate isSource(Endpoint e, string kind, string provenance) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string output
|
||||
|
|
||||
sourceSpec(e, package, type, subtypes, name, signature, ext, output) and
|
||||
ExternalFlow::sourceModel(package, type, subtypes, name, [signature, ""], ext, output, kind,
|
||||
provenance, _)
|
||||
)
|
||||
}
|
||||
|
||||
predicate isNeutral(Endpoint e) {
|
||||
exists(string package, string type, string name, string signature, string endpointType |
|
||||
sinkSpec(e, package, type, _, name, signature, _, _) and
|
||||
endpointType = "sink"
|
||||
or
|
||||
sourceSpec(e, package, type, _, name, signature, _, _) and
|
||||
endpointType = "source"
|
||||
|
|
||||
ExternalFlow::neutralModel(package, type, name, [signature, ""], endpointType, _)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the endpoint concerns a callable with the given package, type, name and signature.
|
||||
*
|
||||
* If `subtypes` is `false`, only the exact callable is considered. If `true`, the callable and
|
||||
* all its overrides are considered.
|
||||
*/
|
||||
additional predicate endpointCallable(
|
||||
Endpoint e, string package, string type, boolean subtypes, string name, string signature
|
||||
) {
|
||||
exists(Callable c |
|
||||
c = e.getCallable() and subtypes in [true, false]
|
||||
or
|
||||
e.getCallable().(Method).getSourceDeclaration().overrides+(c) and subtypes = true
|
||||
|
|
||||
c.hasQualifiedName(package, type, name) and
|
||||
signature = ExternalFlow::paramsString(c)
|
||||
)
|
||||
}
|
||||
|
||||
additional predicate sinkSpec(
|
||||
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
|
||||
string ext, string input
|
||||
) {
|
||||
endpointCallable(e, package, type, subtypes, name, signature) and
|
||||
ext = "" and
|
||||
input = e.getMaDInput()
|
||||
}
|
||||
|
||||
additional predicate sourceSpec(
|
||||
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
|
||||
string ext, string output
|
||||
) {
|
||||
endpointCallable(e, package, type, subtypes, name, signature) and
|
||||
ext = "" and
|
||||
output = e.getMaDOutput()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the related location for the given endpoint.
|
||||
*
|
||||
* The only related location we model is the the call expression surrounding to
|
||||
* which the endpoint is either argument or qualifier (known as the call context).
|
||||
*/
|
||||
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
|
||||
type = CallContext() and
|
||||
result = e.(CallArgument).getCall()
|
||||
or
|
||||
type = MethodDoc() and
|
||||
result = e.getCallable().(Documentable).getJavadoc()
|
||||
or
|
||||
type = ClassDoc() and
|
||||
result = e.getCallable().getDeclaringType().(Documentable).getJavadoc()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains endpoints that are defined in QL code rather than as a MaD model. Ideally this predicate
|
||||
* should be empty.
|
||||
*/
|
||||
private predicate isCustomSink(Endpoint e, string kind) {
|
||||
e.asNode() instanceof QueryInjectionSink and kind = "sql"
|
||||
}
|
||||
|
||||
module CharacteristicsImpl =
|
||||
SharedCharacteristics::SharedCharacteristics<ApplicationCandidatesImpl>;
|
||||
|
||||
class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
|
||||
|
||||
class Endpoint = ApplicationCandidatesImpl::Endpoint;
|
||||
|
||||
/*
|
||||
* Predicates that are used to surface prompt examples and candidates for classification with an ML model.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A MetadataExtractor that extracts metadata for application mode.
|
||||
*/
|
||||
class ApplicationModeMetadataExtractor extends string {
|
||||
ApplicationModeMetadataExtractor() { this = "ApplicationModeMetadataExtractor" }
|
||||
|
||||
predicate hasMetadata(
|
||||
Endpoint e, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string isVarargsArray, string alreadyAiModeled,
|
||||
string extensibleType
|
||||
) {
|
||||
exists(Callable callable | e.getCallable() = callable |
|
||||
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
|
||||
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
// we're using the erased types because the MaD convention is to not specify type parameters.
|
||||
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
|
||||
type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and
|
||||
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
|
||||
name = callable.getName() and
|
||||
signature = ExternalFlow::paramsString(callable) and
|
||||
(
|
||||
if e instanceof ImplicitVarargsArray
|
||||
then isVarargsArray = "true"
|
||||
else isVarargsArray = "false"
|
||||
) and
|
||||
extensibleType = e.getExtensibleType()
|
||||
) and
|
||||
(
|
||||
not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = ""
|
||||
or
|
||||
CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
|
||||
*
|
||||
* The other parameters record various other properties of interest.
|
||||
*/
|
||||
predicate isCandidate(
|
||||
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
|
||||
) {
|
||||
CharacteristicsImpl::isCandidate(endpoint, _) and
|
||||
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
|
||||
u.appliesToEndpoint(endpoint)
|
||||
) and
|
||||
any(ApplicationModeMetadataExtractor meta)
|
||||
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs,
|
||||
alreadyAiModeled, extensibleType) and
|
||||
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
|
||||
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
|
||||
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
|
||||
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
|
||||
// types, and we don't need to reexamine it.
|
||||
alreadyAiModeled.matches(["", "%ai-%"]) and
|
||||
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given `endpoint` is a negative example for the `extensibleType`
|
||||
* because of the `characteristic`.
|
||||
*
|
||||
* The other parameters record various other properties of interest.
|
||||
*/
|
||||
predicate isNegativeExample(
|
||||
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
|
||||
string type, string subtypes, string name, string signature, string input, string output,
|
||||
string isVarargsArray, string extensibleType
|
||||
) {
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
// the node is known not to be an endpoint of any appropriate type
|
||||
forall(AutomodelEndpointTypes::EndpointType tp |
|
||||
tp = CharacteristicsImpl::getAPotentialType(endpoint)
|
||||
|
|
||||
characteristic.hasImplications(tp, false, _)
|
||||
) and
|
||||
// the lowest confidence across all endpoint types should be at least highConfidence
|
||||
confidence =
|
||||
min(float c |
|
||||
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
|
||||
) and
|
||||
confidence >= SharedCharacteristics::highConfidence() and
|
||||
any(ApplicationModeMetadataExtractor meta)
|
||||
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, _, extensibleType) and
|
||||
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
|
||||
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
|
||||
not exists(EndpointCharacteristic characteristic2, float confidence2 |
|
||||
characteristic2 != characteristic
|
||||
|
|
||||
characteristic2.appliesToEndpoint(endpoint) and
|
||||
confidence2 >= SharedCharacteristics::maximalConfidence() and
|
||||
characteristic2
|
||||
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given `endpoint` is a positive example for the `endpointType`.
|
||||
*
|
||||
* The other parameters record various other properties of interest.
|
||||
*/
|
||||
predicate isPositiveExample(
|
||||
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
|
||||
string signature, string input, string output, string isVarargsArray, string extensibleType
|
||||
) {
|
||||
any(ApplicationModeMetadataExtractor meta)
|
||||
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, _, extensibleType) and
|
||||
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
|
||||
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
|
||||
}
|
||||
|
||||
/*
|
||||
* EndpointCharacteristic classes that are specific to Automodel for Java.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks.
|
||||
*
|
||||
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
|
||||
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
|
||||
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
|
||||
*
|
||||
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
|
||||
*/
|
||||
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
|
||||
UnexploitableIsCharacteristic() { this = "argument of is-style boolean method" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
e.getCallable().getName().matches("is%") and
|
||||
e.getCallable().getReturnType() instanceof BooleanType and
|
||||
not ApplicationCandidatesImpl::isSink(e, _, _)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
|
||||
* considered sinks.
|
||||
*
|
||||
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
|
||||
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
|
||||
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
|
||||
*/
|
||||
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
|
||||
UnexploitableExistsCharacteristic() { this = "argument of existence-checking boolean method" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
exists(Callable callable | callable = e.getCallable() |
|
||||
callable.getName().toLowerCase() = ["exists", "notexists"] and
|
||||
callable.getReturnType() instanceof BooleanType
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
|
||||
* and its return value should not be considered a source.
|
||||
*/
|
||||
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
ExceptionCharacteristic() { this = "argument/result of exception-related method" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and
|
||||
(
|
||||
e.getExtensibleType() = "sinkModel" and
|
||||
not ApplicationCandidatesImpl::isSink(e, _, _)
|
||||
or
|
||||
e.getExtensibleType() = "sourceModel" and
|
||||
not ApplicationCandidatesImpl::isSource(e, _, _) and
|
||||
e.getMaDOutput() = "ReturnValue"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that an endpoint is a MaD taint step. MaD modeled taint steps are global,
|
||||
* so they are not sinks for any query. Non-MaD taint steps might be specific to a particular query, so we don't
|
||||
* filter those out.
|
||||
*/
|
||||
private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
|
||||
IsMaDTaintStepCharacteristic() { this = "taint step" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e.asNode(), _, _)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e.asNode(), _, _)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summaryGetterStep(e.asNode(), _, _, _)
|
||||
or
|
||||
FlowSummaryImpl::Private::Steps::summarySetterStep(e.asNode(), _, _, _)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A call to a method that's known locally will not be considered as a candidate to model.
|
||||
*
|
||||
* The reason is that we would expect data/taint flow into the method implementation to uncover
|
||||
* any sinks that are present there.
|
||||
*/
|
||||
private class LocalCall extends CharacteristicsImpl::UninterestingToModelCharacteristic {
|
||||
LocalCall() { this = "local call" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
e.(CallArgument).getCallable().fromSource()
|
||||
or
|
||||
e.(MethodReturnValue).getCallable().fromSource()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A characteristic that marks endpoints as uninteresting to model, according to the Java ModelExclusions module.
|
||||
*/
|
||||
private class ExcludedFromModeling extends CharacteristicsImpl::UninterestingToModelCharacteristic {
|
||||
ExcludedFromModeling() { this = "excluded from modeling" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
ModelExclusions::isUninterestingForModels(e.getCallable())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in
|
||||
* the standard Java modeling, because they cannot be called from outside the package.
|
||||
*/
|
||||
private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic
|
||||
{
|
||||
NonPublicMethodCharacteristic() { this = "non-public method" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
exists(Callable c | c = e.getCallable() | not c.isPublic())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that an endpoint is a non-sink argument to a method whose sinks have already
|
||||
* been modeled _manually_. This is restricted to manual sinks only, because only during the manual process do we have
|
||||
* the expectation that all sinks present in a method have been considered.
|
||||
*
|
||||
* WARNING: These endpoints should not be used as negative samples for training, because some sinks may have been missed
|
||||
* when the method was modeled. Specifically, as we start using ATM to merge in new declarations, we can be less sure
|
||||
* that a method with one argument modeled as a MaD sink has also had its remaining arguments manually reviewed. The
|
||||
* ML model might have predicted argument 0 of some method to be a sink but not argument 1, when in fact argument 1 is
|
||||
* also a sink.
|
||||
*/
|
||||
private class OtherArgumentToModeledMethodCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic
|
||||
{
|
||||
OtherArgumentToModeledMethodCharacteristic() {
|
||||
this = "other argument to a method that has already been modeled manually"
|
||||
}
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
not ApplicationCandidatesImpl::isSink(e, _, _) and
|
||||
exists(CallArgument otherSink |
|
||||
ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and
|
||||
e.(CallArgument).getCall() = otherSink.getCall() and
|
||||
e != otherSink
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the type of the given expression is annotated with `@FunctionalInterface`.
|
||||
*/
|
||||
predicate hasFunctionalInterfaceType(Expr e) {
|
||||
exists(RefType tp | tp = e.getType().getErasure() |
|
||||
tp.getAnAssociatedAnnotation().getType().hasQualifiedName("java.lang", "FunctionalInterface")
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A characteristic that marks functional expression as likely not sinks.
|
||||
*
|
||||
* These expressions may well _contain_ sinks, but rarely are sinks themselves.
|
||||
*/
|
||||
private class FunctionValueCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic {
|
||||
FunctionValueCharacteristic() { this = "function value" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
exists(Expr expr | expr = e.asNode().asExpr() |
|
||||
expr instanceof FunctionalExpr or hasFunctionalInterfaceType(expr)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that an endpoint is not a `to` node for any known taint step. Such a node
|
||||
* cannot be tainted, because taint can't flow into it.
|
||||
*
|
||||
* WARNING: These endpoints should not be used as negative samples for training, because they may include sinks for
|
||||
* which our taint tracking modeling is incomplete.
|
||||
*/
|
||||
private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic
|
||||
{
|
||||
CannotBeTaintedCharacteristic() { this = "cannot be tainted" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) { not this.isKnownOutNodeForStep(e) }
|
||||
|
||||
/**
|
||||
* Holds if the node `n` is known as the predecessor in a modeled flow step.
|
||||
*/
|
||||
private predicate isKnownOutNodeForStep(Endpoint e) {
|
||||
e.asNode().asExpr() instanceof Call or // we just assume flow in that case
|
||||
TaintTracking::localTaintStep(_, e.asNode()) or
|
||||
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(_, e.asNode(), _) or
|
||||
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(_, e.asNode(), _) or
|
||||
FlowSummaryImpl::Private::Steps::summaryGetterStep(_, _, e.asNode(), _) or
|
||||
FlowSummaryImpl::Private::Steps::summarySetterStep(_, _, e.asNode(), _)
|
||||
}
|
||||
}
|
|
@ -1,81 +0,0 @@
|
|||
/**
|
||||
* Surfaces the endpoints that are not already known to be sinks, and are therefore used as candidates for
|
||||
* classification with an ML model.
|
||||
*
|
||||
* Note: This query does not actually classify the endpoints using the model.
|
||||
*
|
||||
* @name Automodel candidates (application mode)
|
||||
* @description A query to extract automodel candidates in application mode.
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id java/ml/extract-automodel-application-candidates
|
||||
* @tags internal extract automodel application-mode candidates
|
||||
*/
|
||||
|
||||
import java
|
||||
private import AutomodelApplicationModeCharacteristics
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
/**
|
||||
* Gets a sample of endpoints (of at most `limit` samples) with the given method signature.
|
||||
*
|
||||
* The main purpose of this helper predicate is to avoid selecting too many candidates, as this may
|
||||
* cause the SARIF file to exceed the maximum size limit.
|
||||
*/
|
||||
bindingset[limit]
|
||||
private Endpoint getSampleForSignature(
|
||||
int limit, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
|
||||
) {
|
||||
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
|
||||
num_endpoints =
|
||||
count(Endpoint e |
|
||||
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
|
||||
alreadyAiModeled, extensibleType)
|
||||
)
|
||||
|
|
||||
result =
|
||||
rank[n](Endpoint e, Location loc |
|
||||
loc = e.asTop().getLocation() and
|
||||
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
|
||||
alreadyAiModeled, extensibleType)
|
||||
|
|
||||
e
|
||||
order by
|
||||
loc.getFile().getAbsolutePath(), loc.getStartLine(), loc.getStartColumn(),
|
||||
loc.getEndLine(), loc.getEndColumn()
|
||||
) and
|
||||
// To avoid selecting samples that are too close together (as the ranking above goes by file
|
||||
// path first), we select `limit` evenly spaced samples from the ranked list of endpoints. By
|
||||
// default this would always include the first sample, so we add a random-chosen prime offset
|
||||
// to the first sample index, and reduce modulo the number of endpoints.
|
||||
// Finally, we add 1 to the result, as ranking results in a 1-indexed relation.
|
||||
n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints)
|
||||
)
|
||||
}
|
||||
|
||||
from
|
||||
Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes,
|
||||
DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output,
|
||||
DollarAtString isVarargsArray, DollarAtString alreadyAiModeled, DollarAtString extensibleType
|
||||
where
|
||||
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray,
|
||||
extensibleType, alreadyAiModeled) and
|
||||
endpoint =
|
||||
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, extensibleType, alreadyAiModeled)
|
||||
select endpoint.asNode(),
|
||||
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
|
||||
package, "package", //
|
||||
type, "type", //
|
||||
subtypes, "subtypes", //
|
||||
name, "name", // method name
|
||||
signature, "signature", //
|
||||
input, "input", //
|
||||
output, "output", //
|
||||
isVarargsArray, "isVarargsArray", //
|
||||
alreadyAiModeled, "alreadyAiModeled", //
|
||||
extensibleType, "extensibleType"
|
|
@ -1,66 +0,0 @@
|
|||
/**
|
||||
* Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
|
||||
*
|
||||
* @name Negative examples (application mode)
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id java/ml/extract-automodel-application-negative-examples
|
||||
* @tags internal extract automodel application-mode negative examples
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelApplicationModeCharacteristics
|
||||
private import AutomodelEndpointTypes
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
/**
|
||||
* Gets a sample of endpoints (of at most `limit` samples) for which the given characteristic applies.
|
||||
*
|
||||
* The main purpose of this helper predicate is to avoid selecting too many samples, as this may
|
||||
* cause the SARIF file to exceed the maximum size limit.
|
||||
*/
|
||||
bindingset[limit]
|
||||
Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
|
||||
exists(int n, int num_endpoints | num_endpoints = count(Endpoint e | c.appliesToEndpoint(e)) |
|
||||
result =
|
||||
rank[n](Endpoint e, Location loc |
|
||||
loc = e.asTop().getLocation() and c.appliesToEndpoint(e)
|
||||
|
|
||||
e
|
||||
order by
|
||||
loc.getFile().getAbsolutePath(), loc.getStartLine(), loc.getStartColumn(),
|
||||
loc.getEndLine(), loc.getEndColumn()
|
||||
) and
|
||||
// To avoid selecting samples that are too close together (as the ranking above goes by file
|
||||
// path first), we select `limit` evenly spaced samples from the ranked list of endpoints. By
|
||||
// default this would always include the first sample, so we add a random-chosen prime offset
|
||||
// to the first sample index, and reduce modulo the number of endpoints.
|
||||
// Finally, we add 1 to the result, as ranking results in a 1-indexed relation.
|
||||
n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints)
|
||||
)
|
||||
}
|
||||
|
||||
from
|
||||
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
|
||||
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
|
||||
DollarAtString signature, DollarAtString input, DollarAtString output,
|
||||
DollarAtString isVarargsArray, DollarAtString extensibleType
|
||||
where
|
||||
endpoint = getSampleForCharacteristic(characteristic, 100) and
|
||||
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
|
||||
input, output, isVarargsArray, extensibleType) and
|
||||
message = characteristic
|
||||
select endpoint.asNode(),
|
||||
message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
|
||||
package, "package", //
|
||||
type, "type", //
|
||||
subtypes, "subtypes", //
|
||||
name, "name", //
|
||||
signature, "signature", //
|
||||
input, "input", //
|
||||
output, "output", //
|
||||
isVarargsArray, "isVarargsArray", //
|
||||
extensibleType, "extensibleType"
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
|
||||
*
|
||||
* @name Positive examples (application mode)
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id java/ml/extract-automodel-application-positive-examples
|
||||
* @tags internal extract automodel application-mode positive examples
|
||||
*/
|
||||
|
||||
private import AutomodelApplicationModeCharacteristics
|
||||
private import AutomodelEndpointTypes
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
from
|
||||
Endpoint endpoint, EndpointType endpointType, ApplicationModeMetadataExtractor meta,
|
||||
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
|
||||
DollarAtString signature, DollarAtString input, DollarAtString output,
|
||||
DollarAtString isVarargsArray, DollarAtString extensibleType
|
||||
where
|
||||
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output,
|
||||
isVarargsArray, extensibleType)
|
||||
select endpoint.asNode(),
|
||||
endpointType + "\nrelated locations: $@, $@, $@." +
|
||||
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
|
||||
package, "package", //
|
||||
type, "type", //
|
||||
subtypes, "subtypes", //
|
||||
name, "name", //
|
||||
signature, "signature", //
|
||||
input, "input", //
|
||||
output, "output", //
|
||||
isVarargsArray, "isVarargsArray", //
|
||||
extensibleType, "extensibleType"
|
|
@ -1,5 +0,0 @@
|
|||
extensions:
|
||||
- addsTo:
|
||||
pack: codeql/java-automodel-queries
|
||||
extensible: automodelCandidateFilter
|
||||
data: []
|
|
@ -1,19 +0,0 @@
|
|||
/**
|
||||
* @name Number of instances of each sink model
|
||||
* @description Counts the number of instances of `ai-generated` sink models.
|
||||
* @kind table
|
||||
* @id java/ml/metrics-count-instances-per-sink-model
|
||||
* @tags internal automodel metrics
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
from int instanceCount, SinkModel s
|
||||
where
|
||||
instanceCount = s.getInstanceCount() and
|
||||
instanceCount > 0 and
|
||||
s.getProvenance() = "ai-generated"
|
||||
select instanceCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
|
||||
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
|
||||
s.getKind() as kind, s.getProvenance() as provenance order by instanceCount desc
|
|
@ -1,82 +0,0 @@
|
|||
/**
|
||||
* For internal use only.
|
||||
*
|
||||
* Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must
|
||||
* only predict classes defined within this file. This file is the source of truth for the integer
|
||||
* representation of each of these classes.
|
||||
*/
|
||||
|
||||
/** A class that can be predicted by a classifier. */
|
||||
abstract class EndpointType extends string {
|
||||
/**
|
||||
* Holds when the string matches the name of the sink / source type.
|
||||
*/
|
||||
bindingset[this]
|
||||
EndpointType() { any() }
|
||||
|
||||
/**
|
||||
* Gets the name of the sink/source kind for this endpoint type as used in models-as-data.
|
||||
*
|
||||
* See https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#LL353C11-L357C31
|
||||
* for sink types, and https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#L365
|
||||
* for source types.
|
||||
*/
|
||||
final string getKind() { result = this }
|
||||
}
|
||||
|
||||
/** A class for sink types that can be predicted by a classifier. */
|
||||
abstract class SinkType extends EndpointType {
|
||||
bindingset[this]
|
||||
SinkType() { any() }
|
||||
}
|
||||
|
||||
/** A sink relevant to the SQL injection query */
|
||||
class SqlInjectionSinkType extends SinkType {
|
||||
SqlInjectionSinkType() { this = "sql-injection" }
|
||||
}
|
||||
|
||||
/** A sink relevant to the tainted path injection query. */
|
||||
class PathInjectionSinkType extends SinkType {
|
||||
PathInjectionSinkType() { this = "path-injection" }
|
||||
}
|
||||
|
||||
/** A sink relevant to the SSRF query. */
|
||||
class RequestForgerySinkType extends SinkType {
|
||||
RequestForgerySinkType() { this = "request-forgery" }
|
||||
}
|
||||
|
||||
/** A sink relevant to the command injection query. */
|
||||
class CommandInjectionSinkType extends SinkType {
|
||||
CommandInjectionSinkType() { this = "command-injection" }
|
||||
}
|
||||
|
||||
/** A sink relevant to file storage. */
|
||||
class FileContentStoreSinkType extends SinkType {
|
||||
FileContentStoreSinkType() { this = "file-content-store" }
|
||||
}
|
||||
|
||||
/** A sink relevant to HTML injection. */
|
||||
class HtmlInjectionSinkType extends SinkType {
|
||||
HtmlInjectionSinkType() { this = "html-injection" }
|
||||
}
|
||||
|
||||
/** A sink relevant to LDAP injection. */
|
||||
class LdapInjectionSinkType extends SinkType {
|
||||
LdapInjectionSinkType() { this = "ldap-injection" }
|
||||
}
|
||||
|
||||
/** A sink relevant to URL redirection. */
|
||||
class UrlRedirectionSinkType extends SinkType {
|
||||
UrlRedirectionSinkType() { this = "url-redirection" }
|
||||
}
|
||||
|
||||
/** A class for source types that can be predicted by a classifier. */
|
||||
abstract class SourceType extends EndpointType {
|
||||
bindingset[this]
|
||||
SourceType() { any() }
|
||||
}
|
||||
|
||||
/** A source of remote data. */
|
||||
class RemoteSourceType extends SourceType {
|
||||
RemoteSourceType() { this = "remote" }
|
||||
}
|
|
@ -1,507 +0,0 @@
|
|||
/**
|
||||
* For internal use only.
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import semmle.code.Location as Location
|
||||
private import semmle.code.java.dataflow.DataFlow
|
||||
private import semmle.code.java.dataflow.TaintTracking
|
||||
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
|
||||
private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl
|
||||
private import semmle.code.java.security.ExternalAPIs as ExternalAPIs
|
||||
private import semmle.code.java.Expr as Expr
|
||||
private import semmle.code.java.security.QueryInjection
|
||||
private import semmle.code.java.security.RequestForgery
|
||||
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
|
||||
private import AutomodelJavaUtil as AutomodelJavaUtil
|
||||
import AutomodelSharedCharacteristics as SharedCharacteristics
|
||||
import AutomodelEndpointTypes as AutomodelEndpointTypes
|
||||
|
||||
newtype JavaRelatedLocationType =
|
||||
MethodDoc() or
|
||||
ClassDoc()
|
||||
|
||||
newtype TFrameworkModeEndpoint =
|
||||
TExplicitParameter(Parameter p) {
|
||||
AutomodelJavaUtil::isFromSource(p) and
|
||||
not AutomodelJavaUtil::isUnexploitableType(p.getType())
|
||||
} or
|
||||
TQualifier(Callable c) { AutomodelJavaUtil::isFromSource(c) and not c instanceof Constructor } or
|
||||
TReturnValue(Callable c) {
|
||||
AutomodelJavaUtil::isFromSource(c) and
|
||||
c instanceof Constructor
|
||||
or
|
||||
AutomodelJavaUtil::isFromSource(c) and
|
||||
c instanceof Method and
|
||||
not AutomodelJavaUtil::isUnexploitableType(c.getReturnType())
|
||||
} or
|
||||
TOverridableParameter(Method m, Parameter p) {
|
||||
AutomodelJavaUtil::isFromSource(p) and
|
||||
not AutomodelJavaUtil::isUnexploitableType(p.getType()) and
|
||||
p.getCallable() = m and
|
||||
m instanceof ModelExclusions::ModelApi and
|
||||
AutomodelJavaUtil::isOverridable(m)
|
||||
} or
|
||||
TOverridableQualifier(Method m) {
|
||||
AutomodelJavaUtil::isFromSource(m) and
|
||||
m instanceof ModelExclusions::ModelApi and
|
||||
AutomodelJavaUtil::isOverridable(m)
|
||||
}
|
||||
|
||||
/**
|
||||
* A framework mode endpoint.
|
||||
*/
|
||||
abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint {
|
||||
/**
|
||||
* Gets the input (if any) for this endpoint, eg.: `Argument[0]`.
|
||||
*
|
||||
* For endpoints that are source candidates, this will be `none()`.
|
||||
*/
|
||||
abstract string getMaDInput();
|
||||
|
||||
/**
|
||||
* Gets the output (if any) for this endpoint, eg.: `ReturnValue`.
|
||||
*
|
||||
* For endpoints that are sink candidates, this will be `none()`.
|
||||
*/
|
||||
abstract string getMaDOutput();
|
||||
|
||||
/**
|
||||
* Returns the name of the parameter of the endpoint.
|
||||
*/
|
||||
abstract string getParamName();
|
||||
|
||||
/**
|
||||
* Returns the callable that contains the endpoint.
|
||||
*/
|
||||
abstract Callable getCallable();
|
||||
|
||||
abstract Top asTop();
|
||||
|
||||
abstract string getExtensibleType();
|
||||
|
||||
string toString() { result = this.asTop().toString() }
|
||||
|
||||
Location getLocation() { result = this.asTop().getLocation() }
|
||||
}
|
||||
|
||||
class ExplicitParameterEndpoint extends FrameworkModeEndpoint, TExplicitParameter {
|
||||
Parameter param;
|
||||
|
||||
ExplicitParameterEndpoint() { this = TExplicitParameter(param) and param.fromSource() }
|
||||
|
||||
override string getMaDInput() { result = "Argument[" + param.getPosition() + "]" }
|
||||
|
||||
override string getMaDOutput() { none() }
|
||||
|
||||
override string getParamName() { result = param.getName() }
|
||||
|
||||
override Callable getCallable() { result = param.getCallable() }
|
||||
|
||||
override Top asTop() { result = param }
|
||||
|
||||
override string getExtensibleType() { result = "sinkModel" }
|
||||
}
|
||||
|
||||
class QualifierEndpoint extends FrameworkModeEndpoint, TQualifier {
|
||||
Callable callable;
|
||||
|
||||
QualifierEndpoint() {
|
||||
this = TQualifier(callable) and not callable.isStatic() and callable.fromSource()
|
||||
}
|
||||
|
||||
override string getMaDInput() { result = "Argument[this]" }
|
||||
|
||||
override string getMaDOutput() { none() }
|
||||
|
||||
override string getParamName() { result = "this" }
|
||||
|
||||
override Callable getCallable() { result = callable }
|
||||
|
||||
override Top asTop() { result = callable }
|
||||
|
||||
override string getExtensibleType() { result = "sinkModel" }
|
||||
}
|
||||
|
||||
class ReturnValue extends FrameworkModeEndpoint, TReturnValue {
|
||||
Callable callable;
|
||||
|
||||
ReturnValue() { this = TReturnValue(callable) and callable.fromSource() }
|
||||
|
||||
override string getMaDInput() { none() }
|
||||
|
||||
override string getMaDOutput() { result = "ReturnValue" }
|
||||
|
||||
override string getParamName() { none() }
|
||||
|
||||
override Callable getCallable() { result = callable }
|
||||
|
||||
override Top asTop() { result = callable }
|
||||
|
||||
override string getExtensibleType() { result = "sourceModel" }
|
||||
}
|
||||
|
||||
class OverridableParameter extends FrameworkModeEndpoint, TOverridableParameter {
|
||||
Method method;
|
||||
Parameter param;
|
||||
|
||||
OverridableParameter() { this = TOverridableParameter(method, param) }
|
||||
|
||||
override string getMaDInput() { none() }
|
||||
|
||||
override string getMaDOutput() { result = "Parameter[" + param.getPosition() + "]" }
|
||||
|
||||
override string getParamName() { result = param.getName() }
|
||||
|
||||
override Callable getCallable() { result = method }
|
||||
|
||||
override Top asTop() { result = param }
|
||||
|
||||
override string getExtensibleType() { result = "sourceModel" }
|
||||
}
|
||||
|
||||
class OverridableQualifier extends FrameworkModeEndpoint, TOverridableQualifier {
|
||||
Method m;
|
||||
|
||||
OverridableQualifier() { this = TOverridableQualifier(m) }
|
||||
|
||||
override string getMaDInput() { none() }
|
||||
|
||||
override string getMaDOutput() { result = "Parameter[this]" }
|
||||
|
||||
override string getParamName() { result = "this" }
|
||||
|
||||
override Callable getCallable() { result = m }
|
||||
|
||||
override Top asTop() { result = m }
|
||||
|
||||
override string getExtensibleType() { result = "sourceModel" }
|
||||
}
|
||||
|
||||
/**
|
||||
* A candidates implementation for framework mode.
|
||||
*
|
||||
* Some important notes:
|
||||
* - This mode is using parameters as endpoints.
|
||||
* - Sink- and neutral-information is being used from MaD models.
|
||||
* - When available, we use method- and class-java-docs as related locations.
|
||||
*/
|
||||
module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
|
||||
// for documentation of the implementations here, see the QLDoc in the CandidateSig signature module.
|
||||
class Endpoint = FrameworkModeEndpoint;
|
||||
|
||||
class EndpointType = AutomodelEndpointTypes::EndpointType;
|
||||
|
||||
class SinkType = AutomodelEndpointTypes::SinkType;
|
||||
|
||||
class SourceType = AutomodelEndpointTypes::SourceType;
|
||||
|
||||
class RelatedLocation = Location::Top;
|
||||
|
||||
class RelatedLocationType = JavaRelatedLocationType;
|
||||
|
||||
// Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
|
||||
predicate isSanitizer(Endpoint e, EndpointType t) { none() }
|
||||
|
||||
RelatedLocation asLocation(Endpoint e) { result = e.asTop() }
|
||||
|
||||
predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2;
|
||||
|
||||
predicate isSink(Endpoint e, string kind, string provenance) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string input
|
||||
|
|
||||
sinkSpec(e, package, type, subtypes, name, signature, ext, input) and
|
||||
ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, kind,
|
||||
provenance, _)
|
||||
)
|
||||
}
|
||||
|
||||
predicate isSource(Endpoint e, string kind, string provenance) {
|
||||
exists(
|
||||
string package, string type, boolean subtypes, string name, string signature, string ext,
|
||||
string output
|
||||
|
|
||||
sourceSpec(e, package, type, subtypes, name, signature, ext, output) and
|
||||
ExternalFlow::sourceModel(package, type, subtypes, name, [signature, ""], ext, output, kind,
|
||||
provenance, _)
|
||||
)
|
||||
}
|
||||
|
||||
predicate isNeutral(Endpoint e) {
|
||||
exists(string package, string type, string name, string signature, string endpointType |
|
||||
sinkSpec(e, package, type, _, name, signature, _, _) and
|
||||
endpointType = "sink"
|
||||
or
|
||||
sourceSpec(e, package, type, _, name, signature, _, _) and
|
||||
endpointType = "source"
|
||||
|
|
||||
ExternalFlow::neutralModel(package, type, name, [signature, ""], endpointType, _)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the endpoint concerns a callable with the given package, type, name and signature.
|
||||
*
|
||||
* If `subtypes` is `false`, only the exact callable is considered. If `true`, the callable and
|
||||
* all its overrides are considered.
|
||||
*/
|
||||
additional predicate endpointCallable(
|
||||
Endpoint e, string package, string type, boolean subtypes, string name, string signature
|
||||
) {
|
||||
exists(Callable c |
|
||||
c = e.getCallable() and subtypes in [true, false]
|
||||
or
|
||||
e.getCallable().(Method).getSourceDeclaration().overrides+(c) and subtypes = true
|
||||
|
|
||||
c.hasQualifiedName(package, type, name) and
|
||||
signature = ExternalFlow::paramsString(c)
|
||||
)
|
||||
}
|
||||
|
||||
additional predicate sinkSpec(
|
||||
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
|
||||
string ext, string input
|
||||
) {
|
||||
endpointCallable(e, package, type, subtypes, name, signature) and
|
||||
ext = "" and
|
||||
input = e.getMaDInput()
|
||||
}
|
||||
|
||||
additional predicate sourceSpec(
|
||||
Endpoint e, string package, string type, boolean subtypes, string name, string signature,
|
||||
string ext, string output
|
||||
) {
|
||||
endpointCallable(e, package, type, subtypes, name, signature) and
|
||||
ext = "" and
|
||||
output = e.getMaDOutput()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the related location for the given endpoint.
|
||||
*
|
||||
* Related locations can be JavaDoc comments of the class or the method.
|
||||
*/
|
||||
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
|
||||
type = MethodDoc() and
|
||||
result = e.getCallable().(Documentable).getJavadoc()
|
||||
or
|
||||
type = ClassDoc() and
|
||||
result = e.getCallable().getDeclaringType().(Documentable).getJavadoc()
|
||||
}
|
||||
}
|
||||
|
||||
module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics<FrameworkCandidatesImpl>;
|
||||
|
||||
class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic;
|
||||
|
||||
class Endpoint = FrameworkCandidatesImpl::Endpoint;
|
||||
|
||||
/*
|
||||
* Predicates that are used to surface prompt examples and candidates for classification with an ML model.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A MetadataExtractor that extracts metadata for framework mode.
|
||||
*/
|
||||
class FrameworkModeMetadataExtractor extends string {
|
||||
FrameworkModeMetadataExtractor() { this = "FrameworkModeMetadataExtractor" }
|
||||
|
||||
predicate hasMetadata(
|
||||
Endpoint e, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string parameterName, string alreadyAiModeled,
|
||||
string extensibleType
|
||||
) {
|
||||
exists(Callable callable | e.getCallable() = callable |
|
||||
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
|
||||
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
|
||||
package = callable.getDeclaringType().getPackage().getName() and
|
||||
// we're using the erased types because the MaD convention is to not specify type parameters.
|
||||
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
|
||||
type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and
|
||||
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
|
||||
name = callable.getName() and
|
||||
signature = ExternalFlow::paramsString(callable) and
|
||||
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
|
||||
e.getExtensibleType() = extensibleType
|
||||
) and
|
||||
(
|
||||
not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = ""
|
||||
or
|
||||
CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
|
||||
*
|
||||
* The other parameters record various other properties of interest.
|
||||
*/
|
||||
predicate isCandidate(
|
||||
Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
|
||||
string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
|
||||
) {
|
||||
CharacteristicsImpl::isCandidate(endpoint, _) and
|
||||
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
|
||||
u.appliesToEndpoint(endpoint)
|
||||
) and
|
||||
any(FrameworkModeMetadataExtractor meta)
|
||||
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
|
||||
alreadyAiModeled, extensibleType) and
|
||||
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
|
||||
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
|
||||
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
|
||||
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
|
||||
// types, and we don't need to reexamine it.
|
||||
alreadyAiModeled.matches(["", "%ai-%"]) and
|
||||
AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given `endpoint` is a negative example for the `extensibleType`
|
||||
* because of the `characteristic`.
|
||||
*
|
||||
* The other parameters record various other properties of interest.
|
||||
*/
|
||||
predicate isNegativeExample(
|
||||
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
|
||||
string type, string subtypes, string name, string signature, string input, string output,
|
||||
string parameterName, string extensibleType
|
||||
) {
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
// the node is known not to be an endpoint of any appropriate type
|
||||
forall(AutomodelEndpointTypes::EndpointType tp |
|
||||
tp = CharacteristicsImpl::getAPotentialType(endpoint)
|
||||
|
|
||||
characteristic.hasImplications(tp, false, _)
|
||||
) and
|
||||
// the lowest confidence across all endpoint types should be at least highConfidence
|
||||
confidence =
|
||||
min(float c |
|
||||
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
|
||||
) and
|
||||
confidence >= SharedCharacteristics::highConfidence() and
|
||||
any(FrameworkModeMetadataExtractor meta)
|
||||
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
|
||||
_, extensibleType) and
|
||||
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
|
||||
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
|
||||
not exists(EndpointCharacteristic characteristic2, float confidence2 |
|
||||
characteristic2 != characteristic
|
||||
|
|
||||
characteristic2.appliesToEndpoint(endpoint) and
|
||||
confidence2 >= SharedCharacteristics::maximalConfidence() and
|
||||
characteristic2
|
||||
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given `endpoint` is a positive example for the `endpointType`.
|
||||
*
|
||||
* The other parameters record various other properties of interest.
|
||||
*/
|
||||
predicate isPositiveExample(
|
||||
Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
|
||||
string signature, string input, string output, string parameterName, string extensibleType
|
||||
) {
|
||||
any(FrameworkModeMetadataExtractor meta)
|
||||
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
|
||||
_, extensibleType) and
|
||||
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
|
||||
}
|
||||
|
||||
/*
|
||||
* EndpointCharacteristic classes that are specific to Automodel for Java.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
|
||||
* and its return value should not be considered a source.
|
||||
*
|
||||
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
|
||||
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
|
||||
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
|
||||
*
|
||||
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
|
||||
*/
|
||||
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
UnexploitableIsCharacteristic() { this = "argument of is-style boolean method" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
e.getCallable().getName().matches("is%") and
|
||||
e.getCallable().getReturnType() instanceof BooleanType and
|
||||
(
|
||||
e.getExtensibleType() = "sinkModel" and
|
||||
not FrameworkCandidatesImpl::isSink(e, _, _)
|
||||
or
|
||||
e.getExtensibleType() = "sourceModel" and
|
||||
not FrameworkCandidatesImpl::isSource(e, _, _) and
|
||||
e.getMaDOutput() = "ReturnValue"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
|
||||
* considered sinks, and its return value should not be considered a source.
|
||||
*
|
||||
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
|
||||
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
|
||||
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
|
||||
*/
|
||||
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
UnexploitableExistsCharacteristic() { this = "argument of existence-checking boolean method" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
exists(Callable callable |
|
||||
callable = e.getCallable() and
|
||||
callable.getName().toLowerCase() = ["exists", "notexists"] and
|
||||
callable.getReturnType() instanceof BooleanType
|
||||
|
|
||||
e.getExtensibleType() = "sinkModel" and
|
||||
not FrameworkCandidatesImpl::isSink(e, _, _)
|
||||
or
|
||||
e.getExtensibleType() = "sourceModel" and
|
||||
not FrameworkCandidatesImpl::isSource(e, _, _) and
|
||||
e.getMaDOutput() = "ReturnValue"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
|
||||
* and its return value should not be considered a source.
|
||||
*/
|
||||
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
|
||||
{
|
||||
ExceptionCharacteristic() { this = "argument/result of exception-related method" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and
|
||||
(
|
||||
e.getExtensibleType() = "sinkModel" and
|
||||
not FrameworkCandidatesImpl::isSink(e, _, _)
|
||||
or
|
||||
e.getExtensibleType() = "sourceModel" and
|
||||
not FrameworkCandidatesImpl::isSource(e, _, _) and
|
||||
e.getMaDOutput() = "ReturnValue"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
|
||||
* are considered worth modeling.
|
||||
*/
|
||||
private class NotAModelApi extends CharacteristicsImpl::UninterestingToModelCharacteristic {
|
||||
NotAModelApi() { this = "not a model API" }
|
||||
|
||||
override predicate appliesToEndpoint(Endpoint e) {
|
||||
not e.getCallable() instanceof ModelExclusions::ModelApi
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
/**
|
||||
* Surfaces the endpoints that are not already known to be sinks, and are therefore used as candidates for
|
||||
* classification with an ML model.
|
||||
*
|
||||
* Note: This query does not actually classify the endpoints using the model.
|
||||
*
|
||||
* @name Automodel candidates (framework mode)
|
||||
* @description A query to extract automodel candidates in framework mode.
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id java/ml/extract-automodel-framework-candidates
|
||||
* @tags internal extract automodel framework-mode candidates
|
||||
*/
|
||||
|
||||
private import AutomodelFrameworkModeCharacteristics
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
from
|
||||
Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes,
|
||||
DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output,
|
||||
DollarAtString parameterName, DollarAtString alreadyAiModeled, DollarAtString extensibleType
|
||||
where
|
||||
isCandidate(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
|
||||
extensibleType, alreadyAiModeled)
|
||||
select endpoint,
|
||||
"Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
|
||||
package, "package", //
|
||||
type, "type", //
|
||||
subtypes, "subtypes", //
|
||||
name, "name", //
|
||||
signature, "signature", //
|
||||
input, "input", //
|
||||
output, "output", //
|
||||
parameterName, "parameterName", //
|
||||
alreadyAiModeled, "alreadyAiModeled", //
|
||||
extensibleType, "extensibleType"
|
|
@ -1,36 +0,0 @@
|
|||
/**
|
||||
* Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt.
|
||||
*
|
||||
* @name Negative examples (framework mode)
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id java/ml/extract-automodel-framework-negative-examples
|
||||
* @tags internal extract automodel framework-mode negative examples
|
||||
*/
|
||||
|
||||
private import AutomodelFrameworkModeCharacteristics
|
||||
private import AutomodelEndpointTypes
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
from
|
||||
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence,
|
||||
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
|
||||
DollarAtString signature, DollarAtString input, DollarAtString output,
|
||||
DollarAtString parameterName, DollarAtString extensibleType
|
||||
where
|
||||
isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature,
|
||||
input, output, parameterName, extensibleType)
|
||||
select endpoint,
|
||||
characteristic + "\nrelated locations: $@, $@." +
|
||||
"\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
|
||||
package, "package", //
|
||||
type, "type", //
|
||||
subtypes, "subtypes", //
|
||||
name, "name", //
|
||||
signature, "signature", //
|
||||
input, "input", //
|
||||
output, "output", //
|
||||
parameterName, "parameterName", //
|
||||
extensibleType, "extensibleType"
|
|
@ -1,34 +0,0 @@
|
|||
/**
|
||||
* Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt.
|
||||
*
|
||||
* @name Positive examples (framework mode)
|
||||
* @kind problem
|
||||
* @problem.severity recommendation
|
||||
* @id java/ml/extract-automodel-framework-positive-examples
|
||||
* @tags internal extract automodel framework-mode positive examples
|
||||
*/
|
||||
|
||||
private import AutomodelFrameworkModeCharacteristics
|
||||
private import AutomodelEndpointTypes
|
||||
private import AutomodelJavaUtil
|
||||
|
||||
from
|
||||
Endpoint endpoint, EndpointType endpointType, DollarAtString package, DollarAtString type,
|
||||
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
|
||||
DollarAtString output, DollarAtString parameterName, DollarAtString extensibleType
|
||||
where
|
||||
isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output,
|
||||
parameterName, extensibleType)
|
||||
select endpoint,
|
||||
endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", //
|
||||
CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", //
|
||||
package, "package", //
|
||||
type, "type", //
|
||||
subtypes, "subtypes", //
|
||||
name, "name", //
|
||||
signature, "signature", //
|
||||
input, "input", //
|
||||
output, "output", //
|
||||
parameterName, "parameterName", //
|
||||
extensibleType, "extensibleType"
|
|
@ -1,111 +0,0 @@
|
|||
private import java
|
||||
private import AutomodelEndpointTypes as AutomodelEndpointTypes
|
||||
|
||||
/**
|
||||
* A helper class to represent a string value that can be returned by a query using $@ notation.
|
||||
*
|
||||
* It extends `string`, but adds a mock `hasLocationInfo` method that returns the string itself as the file name.
|
||||
*
|
||||
* Use this, when you want to return a string value from a query using $@ notation - the string value
|
||||
* will be included in the sarif file.
|
||||
*
|
||||
*
|
||||
* Background information on `hasLocationInfo`:
|
||||
* https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-location-information
|
||||
*/
|
||||
class DollarAtString extends string {
|
||||
bindingset[this]
|
||||
DollarAtString() { any() }
|
||||
|
||||
bindingset[this]
|
||||
predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) {
|
||||
path = this and sl = 1 and sc = 1 and el = 1 and ec = 1
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds for all combinations of MaD kinds (`kind`) and their human readable
|
||||
* descriptions.
|
||||
*/
|
||||
predicate isKnownKind(string kind, AutomodelEndpointTypes::EndpointType type) {
|
||||
kind = type.getKind()
|
||||
}
|
||||
|
||||
/**
|
||||
* By convention, the subtypes property of the MaD declaration should only be
|
||||
* true when there _can_ exist any subtypes with a different implementation.
|
||||
*
|
||||
* It would technically be ok to always use the value 'true', but this would
|
||||
* break convention.
|
||||
*/
|
||||
pragma[nomagic]
|
||||
boolean considerSubtypes(Callable callable) {
|
||||
if
|
||||
callable.isStatic() or
|
||||
callable.getDeclaringType().isStatic() or
|
||||
callable.isFinal() or
|
||||
callable.getDeclaringType().isFinal()
|
||||
then result = false
|
||||
else result = true
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given package, type, name and signature is a candidate for automodeling.
|
||||
*
|
||||
* This predicate is extensible, so that different endpoints can be selected at runtime.
|
||||
*/
|
||||
extensible predicate automodelCandidateFilter(
|
||||
string package, string type, string name, string signature
|
||||
);
|
||||
|
||||
/**
|
||||
* Holds if the given package, type, name and signature is a candidate for automodeling.
|
||||
*
|
||||
* This relies on an extensible predicate, and if that is not supplied then
|
||||
* all endpoints are considered candidates.
|
||||
*/
|
||||
bindingset[package, type, name, signature]
|
||||
predicate includeAutomodelCandidate(string package, string type, string name, string signature) {
|
||||
not automodelCandidateFilter(_, _, _, _) or
|
||||
automodelCandidateFilter(package, type, name, signature)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given program element corresponds to a piece of source code,
|
||||
* that is, it is not compiler-generated.
|
||||
*
|
||||
* Note: This is a stricter check than `Element::fromSource`, which simply
|
||||
* checks whether the element is in a source file as opposed to a JAR file.
|
||||
* There can be compiler-generated elements in source files (especially for
|
||||
* Kotlin), which we also want to exclude.
|
||||
*/
|
||||
predicate isFromSource(Element e) {
|
||||
// from a source file (not a JAR)
|
||||
e.fromSource() and
|
||||
// not explicitly marked as compiler-generated
|
||||
not e.isCompilerGenerated() and
|
||||
// does not have a dummy location
|
||||
not e.hasLocationInfo(_, 0, 0, 0, 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if taint cannot flow through the given type (because it is a numeric
|
||||
* type or some other type with a fixed set of values).
|
||||
*/
|
||||
predicate isUnexploitableType(Type tp) {
|
||||
tp instanceof PrimitiveType or
|
||||
tp instanceof BoxedType or
|
||||
tp instanceof NumberType or
|
||||
tp instanceof VoidType
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given method can be overridden, that is, it is not final,
|
||||
* static, or private.
|
||||
*/
|
||||
predicate isOverridable(Method m) {
|
||||
not m.getDeclaringType().isFinal() and
|
||||
not m.isFinal() and
|
||||
not m.isStatic() and
|
||||
not m.isPrivate()
|
||||
}
|
|
@ -1,412 +0,0 @@
|
|||
float maximalConfidence() { result = 1.0 }
|
||||
|
||||
float highConfidence() { result = 0.9 }
|
||||
|
||||
float mediumConfidence() { result = 0.6 }
|
||||
|
||||
/**
|
||||
* A specification of how to instantiate the shared characteristics for a given candidate class.
|
||||
*
|
||||
* The `CandidateSig` implementation specifies a type to use for Endpoints (eg., `ParameterNode`), as well as a type
|
||||
* to label endpoint classes (the `EndpointType`). One of the endpoint classes needs to be a 'negative' class, meaning
|
||||
* "not any of the other known endpoint types".
|
||||
*/
|
||||
signature module CandidateSig {
|
||||
/**
|
||||
* An endpoint is a potential candidate for modeling. This will typically be bound to the language's
|
||||
* DataFlow node class, or a subtype thereof.
|
||||
*/
|
||||
class Endpoint {
|
||||
/**
|
||||
* Gets the kind of this endpoint, either "sourceModel" or "sinkModel".
|
||||
*/
|
||||
string getExtensibleType();
|
||||
|
||||
/**
|
||||
* Gets a string representation of this endpoint.
|
||||
*/
|
||||
string toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* A related location for an endpoint. This will typically be bound to the supertype of all AST nodes (eg., `Top`).
|
||||
*/
|
||||
class RelatedLocation;
|
||||
|
||||
/**
|
||||
* A label for a related location.
|
||||
*
|
||||
* Eg., method-doc, class-doc, etc.
|
||||
*/
|
||||
class RelatedLocationType;
|
||||
|
||||
/**
|
||||
* An endpoint type considered by this specification.
|
||||
*/
|
||||
class EndpointType extends string;
|
||||
|
||||
/**
|
||||
* A sink endpoint type considered by this specification.
|
||||
*/
|
||||
class SinkType extends EndpointType;
|
||||
|
||||
/**
|
||||
* A source endpoint type considered by this specification.
|
||||
*/
|
||||
class SourceType extends EndpointType;
|
||||
|
||||
/**
|
||||
* Gets the endpoint as a location.
|
||||
*
|
||||
* This is a utility function to convert an endpoint to its corresponding location.
|
||||
*/
|
||||
RelatedLocation asLocation(Endpoint e);
|
||||
|
||||
/**
|
||||
* Defines what MaD kinds are known, and what endpoint type they correspond to.
|
||||
*/
|
||||
predicate isKnownKind(string kind, EndpointType type);
|
||||
|
||||
/**
|
||||
* Holds if `e` is a flow sanitizer, and has type `t`.
|
||||
*/
|
||||
predicate isSanitizer(Endpoint e, EndpointType t);
|
||||
|
||||
/**
|
||||
* Holds if `e` is a sink with the label `kind`, and provenance `provenance`.
|
||||
*/
|
||||
predicate isSink(Endpoint e, string kind, string provenance);
|
||||
|
||||
/**
|
||||
* Holds if `e` is a source with the label `kind`, and provenance `provenance`.
|
||||
*/
|
||||
predicate isSource(Endpoint e, string kind, string provenance);
|
||||
|
||||
/**
|
||||
* Holds if `e` is not a source or sink of any kind.
|
||||
*/
|
||||
predicate isNeutral(Endpoint e);
|
||||
|
||||
/**
|
||||
* Gets a related location.
|
||||
*
|
||||
* A related location is a source code location that may hold extra information about an endpoint that can be useful
|
||||
* to the machine learning model.
|
||||
*
|
||||
* For example, a related location for a method call may be the documentation comment of a method.
|
||||
*/
|
||||
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType name);
|
||||
}
|
||||
|
||||
/**
|
||||
* A set of shared characteristics for a given candidate class.
|
||||
*
|
||||
* This module is language-agnostic, although the `CandidateSig` module will be language-specific.
|
||||
*
|
||||
* The language specific implementation can also further extend the behavior of this module by adding additional
|
||||
* implementations of endpoint characteristics exported by this module.
|
||||
*/
|
||||
module SharedCharacteristics<CandidateSig Candidate> {
|
||||
predicate isSink = Candidate::isSink/3;
|
||||
|
||||
predicate isNeutral = Candidate::isNeutral/1;
|
||||
|
||||
predicate isModeled(Candidate::Endpoint e, string kind, string extensibleKind, string provenance) {
|
||||
Candidate::isSink(e, kind, provenance) and extensibleKind = "sinkModel"
|
||||
or
|
||||
Candidate::isSource(e, kind, provenance) and extensibleKind = "sourceModel"
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `endpoint` is modeled as `endpointType`.
|
||||
*/
|
||||
predicate isKnownAs(
|
||||
Candidate::Endpoint endpoint, Candidate::EndpointType endpointType,
|
||||
EndpointCharacteristic characteristic
|
||||
) {
|
||||
// If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a
|
||||
// known sink for the class.
|
||||
characteristic.appliesToEndpoint(endpoint) and
|
||||
characteristic.hasImplications(endpointType, true, maximalConfidence())
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a potential type of this endpoint to make sure that sources are
|
||||
* associated with source types and sinks with sink types.
|
||||
*/
|
||||
Candidate::EndpointType getAPotentialType(Candidate::Endpoint endpoint) {
|
||||
endpoint.getExtensibleType() = "sourceModel" and
|
||||
result instanceof Candidate::SourceType
|
||||
or
|
||||
endpoint.getExtensibleType() = "sinkModel" and
|
||||
result instanceof Candidate::SinkType
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if the given `endpoint` should be considered as a candidate for type `endpointType`,
|
||||
* and classified by the ML model.
|
||||
*
|
||||
* A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics.
|
||||
*/
|
||||
predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) {
|
||||
endpointType = getAPotentialType(endpoint) and
|
||||
not exists(getAnExcludingCharacteristic(endpoint, endpointType))
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the related location of `e` with name `name`, if it exists.
|
||||
* Otherwise, gets the candidate itself.
|
||||
*/
|
||||
Candidate::RelatedLocation getRelatedLocationOrCandidate(
|
||||
Candidate::Endpoint e, Candidate::RelatedLocationType type
|
||||
) {
|
||||
if exists(Candidate::getRelatedLocation(e, type))
|
||||
then result = Candidate::getRelatedLocation(e, type)
|
||||
else result = Candidate::asLocation(e)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a characteristics that disbar `endpoint` from being a candidate for `endpointType`
|
||||
* with at least medium confidence.
|
||||
*/
|
||||
EndpointCharacteristic getAnExcludingCharacteristic(
|
||||
Candidate::Endpoint endpoint, Candidate::EndpointType endpointType
|
||||
) {
|
||||
result.appliesToEndpoint(endpoint) and
|
||||
exists(float confidence |
|
||||
confidence >= mediumConfidence() and
|
||||
result.hasImplications(endpointType, false, confidence)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions
|
||||
* about whether to include the endpoint in the training set and with what kind, as well as whether to score the
|
||||
* endpoint at inference time.
|
||||
*/
|
||||
abstract class EndpointCharacteristic extends string {
|
||||
/**
|
||||
* Holds for the string that is the name of the characteristic. This should describe some property of an endpoint
|
||||
* that is meaningful for determining whether it's a sink, and if so, of which sink type.
|
||||
*/
|
||||
bindingset[this]
|
||||
EndpointCharacteristic() { any() }
|
||||
|
||||
/**
|
||||
* Holds for endpoints that have this characteristic.
|
||||
*/
|
||||
abstract predicate appliesToEndpoint(Candidate::Endpoint n);
|
||||
|
||||
/**
|
||||
* This predicate describes what the characteristic tells us about an endpoint.
|
||||
*
|
||||
* Params:
|
||||
* endpointType: The sink/source type.
|
||||
* isPositiveIndicator: If true, this characteristic indicates that this endpoint _is_ a member of the class; if
|
||||
* false, it indicates that it _isn't_ a member of the class.
|
||||
* confidence: A float in [0, 1], which tells us how strong an indicator this characteristic is for the endpoint
|
||||
* belonging / not belonging to the given class. A confidence near zero means this characteristic is a very weak
|
||||
* indicator of whether or not the endpoint belongs to the class. A confidence of 1 means that all endpoints with
|
||||
* this characteristic definitively do/don't belong to the class.
|
||||
*/
|
||||
abstract predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
);
|
||||
|
||||
/** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */
|
||||
final float getHighConfidenceThreshold() { result = 0.8 }
|
||||
}
|
||||
|
||||
/**
|
||||
* A high-confidence characteristic that indicates that an endpoint is a sink of a specified type. These endpoints can
|
||||
* be used as positive samples for training or for a few-shot prompt.
|
||||
*/
|
||||
abstract class SinkCharacteristic extends EndpointCharacteristic {
|
||||
bindingset[this]
|
||||
SinkCharacteristic() { any() }
|
||||
|
||||
abstract Candidate::EndpointType getSinkType();
|
||||
|
||||
final override predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
endpointType = this.getSinkType() and
|
||||
isPositiveIndicator = true and
|
||||
confidence = maximalConfidence()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A high-confidence characteristic that indicates that an endpoint is a source of a specified type. These endpoints can
|
||||
* be used as positive samples for training or for a few-shot prompt.
|
||||
*/
|
||||
abstract class SourceCharacteristic extends EndpointCharacteristic {
|
||||
bindingset[this]
|
||||
SourceCharacteristic() { any() }
|
||||
|
||||
abstract Candidate::EndpointType getSourceType();
|
||||
|
||||
final override predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
endpointType = this.getSourceType() and
|
||||
isPositiveIndicator = true and
|
||||
confidence = maximalConfidence()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A high-confidence characteristic that indicates that an endpoint is not a sink of any type. These endpoints can be
|
||||
* used as negative samples for training or for a few-shot prompt.
|
||||
*/
|
||||
abstract class NotASinkCharacteristic extends EndpointCharacteristic {
|
||||
bindingset[this]
|
||||
NotASinkCharacteristic() { any() }
|
||||
|
||||
override predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
endpointType instanceof Candidate::SinkType and
|
||||
isPositiveIndicator = false and
|
||||
confidence = highConfidence()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A high-confidence characteristic that indicates that an endpoint is not a source of any type. These endpoints can be
|
||||
* used as negative samples for training or for a few-shot prompt.
|
||||
*/
|
||||
abstract class NotASourceCharacteristic extends EndpointCharacteristic {
|
||||
bindingset[this]
|
||||
NotASourceCharacteristic() { any() }
|
||||
|
||||
override predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
endpointType instanceof Candidate::SourceType and
|
||||
isPositiveIndicator = false and
|
||||
confidence = highConfidence()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type.
|
||||
*/
|
||||
abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic,
|
||||
NotASourceCharacteristic
|
||||
{
|
||||
bindingset[this]
|
||||
NeitherSourceNorSinkCharacteristic() { any() }
|
||||
|
||||
final override predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or
|
||||
NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A medium-confidence characteristic that indicates that an endpoint is unlikely to be a sink of any type. These
|
||||
* endpoints can be excluded from scoring at inference time, both to save time and to avoid false positives. They should
|
||||
* not, however, be used as negative samples for training or for a few-shot prompt, because they may include a small
|
||||
* number of sinks.
|
||||
*/
|
||||
abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic {
|
||||
bindingset[this]
|
||||
LikelyNotASinkCharacteristic() { any() }
|
||||
|
||||
override predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
endpointType instanceof Candidate::SinkType and
|
||||
isPositiveIndicator = false and
|
||||
confidence = mediumConfidence()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A characteristic that indicates not necessarily that an endpoint is not a sink, but rather that it is not a sink
|
||||
* that's interesting to model in the standard Java libraries. These filters should be removed when extracting sink
|
||||
* candidates within a user's codebase for customized modeling.
|
||||
*
|
||||
* These endpoints should not be used as negative samples for training or for a few-shot prompt, because they are not
|
||||
* necessarily non-sinks.
|
||||
*/
|
||||
abstract class UninterestingToModelCharacteristic extends EndpointCharacteristic {
|
||||
bindingset[this]
|
||||
UninterestingToModelCharacteristic() { any() }
|
||||
|
||||
override predicate hasImplications(
|
||||
Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence
|
||||
) {
|
||||
endpointType instanceof Candidate::SinkType and
|
||||
isPositiveIndicator = false and
|
||||
confidence = mediumConfidence()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains default implementations that are derived solely from the `CandidateSig` implementation.
|
||||
*/
|
||||
private module DefaultCharacteristicImplementations {
|
||||
/**
|
||||
* Endpoints identified as sinks by the `CandidateSig` implementation are sinks with maximal confidence.
|
||||
*/
|
||||
private class KnownSinkCharacteristic extends SinkCharacteristic {
|
||||
string madKind;
|
||||
Candidate::EndpointType endpointType;
|
||||
string provenance;
|
||||
|
||||
KnownSinkCharacteristic() {
|
||||
Candidate::isKnownKind(madKind, endpointType) and
|
||||
// bind "this" to a unique string differing from that of the SinkType classes
|
||||
this = madKind + "_" + provenance + "_characteristic" and
|
||||
Candidate::isSink(_, madKind, provenance)
|
||||
}
|
||||
|
||||
override predicate appliesToEndpoint(Candidate::Endpoint e) {
|
||||
Candidate::isSink(e, madKind, provenance)
|
||||
}
|
||||
|
||||
override Candidate::EndpointType getSinkType() { result = endpointType }
|
||||
}
|
||||
|
||||
private class KnownSourceCharacteristic extends SourceCharacteristic {
|
||||
string madKind;
|
||||
Candidate::EndpointType endpointType;
|
||||
string provenance;
|
||||
|
||||
KnownSourceCharacteristic() {
|
||||
Candidate::isKnownKind(madKind, endpointType) and
|
||||
// bind "this" to a unique string differing from that of the SinkType classes
|
||||
this = madKind + "_" + provenance + "_characteristic" and
|
||||
Candidate::isSource(_, madKind, provenance)
|
||||
}
|
||||
|
||||
override predicate appliesToEndpoint(Candidate::Endpoint e) {
|
||||
Candidate::isSource(e, madKind, provenance)
|
||||
}
|
||||
|
||||
override Candidate::EndpointType getSourceType() { result = endpointType }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that an endpoint was manually modeled as a neutral model.
|
||||
*/
|
||||
private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic {
|
||||
NeutralModelCharacteristic() { this = "known non-sink" }
|
||||
|
||||
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) }
|
||||
}
|
||||
|
||||
/**
|
||||
* A negative characteristic that indicates that an endpoint is a sanitizer, and thus not a source.
|
||||
*/
|
||||
private class IsSanitizerCharacteristic extends NotASourceCharacteristic {
|
||||
IsSanitizerCharacteristic() { this = "known sanitizer" }
|
||||
|
||||
override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSanitizer(e, _) }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,62 +0,0 @@
|
|||
/**
|
||||
* This file contains query predicates for use when gathering metrics at scale using Multi Repo
|
||||
* Variant Analysis.
|
||||
*/
|
||||
|
||||
private import java
|
||||
private import AutomodelAlertSinkUtil
|
||||
|
||||
/**
|
||||
* Holds if `alertCount` is the number of alerts for the query with ID `queryId` for which the
|
||||
* sinks correspond to the given `ai-generated` sink model.
|
||||
*/
|
||||
query predicate sinkModelCountPerQuery(
|
||||
string queryId, int alertCount, string package, string type, boolean subtypes, string name,
|
||||
string signature, string input, string ext, string kind, string provenance
|
||||
) {
|
||||
exists(SinkModel s |
|
||||
sinkModelTallyPerQuery(queryId, alertCount, s) and
|
||||
s.getProvenance() = "ai-generated" and
|
||||
s.getPackage() = package and
|
||||
s.getType() = type and
|
||||
s.getSubtypes() = subtypes and
|
||||
s.getName() = name and
|
||||
s.getSignature() = signature and
|
||||
s.getInput() = input and
|
||||
s.getExt() = ext and
|
||||
s.getKind() = kind and
|
||||
s.getProvenance() = provenance
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds if `instanceCount` is the number of instances corresponding to the given `ai-generated`
|
||||
* sink model (as identified by the `package`, `name`, `input`, etc.).
|
||||
*/
|
||||
query predicate instanceCount(
|
||||
int instanceCount, string package, string type, boolean subtypes, string name, string signature,
|
||||
string input, string ext, string kind, string provenance
|
||||
) {
|
||||
exists(SinkModel s |
|
||||
instanceCount = s.getInstanceCount() and
|
||||
instanceCount > 0 and
|
||||
s.getProvenance() = "ai-generated" and
|
||||
s.getPackage() = package and
|
||||
s.getType() = type and
|
||||
s.getSubtypes() = subtypes and
|
||||
s.getName() = name and
|
||||
s.getSignature() = signature and
|
||||
s.getInput() = input and
|
||||
s.getExt() = ext and
|
||||
s.getKind() = kind and
|
||||
s.getProvenance() = provenance
|
||||
)
|
||||
}
|
||||
|
||||
// MRVA requires a select clause, so we repurpose it to tell us which query predicates had results.
|
||||
from string hadResults
|
||||
where
|
||||
sinkModelCountPerQuery(_, _, _, _, _, _, _, _, _, _, _) and hadResults = "sinkModelCountPerQuery"
|
||||
or
|
||||
instanceCount(_, _, _, _, _, _, _, _, _, _) and hadResults = "instanceCount"
|
||||
select hadResults
|
|
@ -1,2 +0,0 @@
|
|||
---
|
||||
lastReleaseVersion: 1.0.11
|
|
@ -1,10 +0,0 @@
|
|||
name: codeql/java-automodel-queries
|
||||
version: 1.0.12-dev
|
||||
groups:
|
||||
- java
|
||||
- automodel
|
||||
dependencies:
|
||||
codeql/java-all: ${workspace}
|
||||
dataExtensions:
|
||||
- AutomodelCandidateFilter.yml
|
||||
warnOnImplicitThis: true
|
|
@ -1,2 +0,0 @@
|
|||
testFailures
|
||||
failures
|
|
@ -1,35 +0,0 @@
|
|||
import java
|
||||
import AutomodelApplicationModeCharacteristics as Characteristics
|
||||
import AutomodelExtractionTests
|
||||
|
||||
module TestHelper implements TestHelperSig<Characteristics::ApplicationCandidatesImpl> {
|
||||
Location getEndpointLocation(Characteristics::Endpoint endpoint) {
|
||||
result = endpoint.asTop().getLocation()
|
||||
}
|
||||
|
||||
predicate isCandidate(
|
||||
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
|
||||
string extensibleType
|
||||
) {
|
||||
Characteristics::isCandidate(endpoint, _, _, _, name, signature, input, output, _,
|
||||
extensibleType, _)
|
||||
}
|
||||
|
||||
predicate isPositiveExample(
|
||||
Characteristics::Endpoint endpoint, string endpointType, string name, string signature,
|
||||
string input, string output, string extensibleType
|
||||
) {
|
||||
Characteristics::isPositiveExample(endpoint, endpointType, _, _, _, name, signature, input,
|
||||
output, _, extensibleType)
|
||||
}
|
||||
|
||||
predicate isNegativeExample(
|
||||
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
|
||||
string extensibleType
|
||||
) {
|
||||
Characteristics::isNegativeExample(endpoint, _, _, _, _, _, name, signature, input, output, _,
|
||||
extensibleType)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<Extraction<Characteristics::ApplicationCandidatesImpl, TestHelper>>
|
|
@ -1,8 +0,0 @@
|
|||
import hudson.Plugin;
|
||||
|
||||
public class PluginImpl extends Plugin {
|
||||
@Override
|
||||
public void configure(String name, String value) { // $ sourceModelCandidate=configure(String,String):Parameter[0] sourceModelCandidate=configure(String,String):Parameter[1]
|
||||
// ...
|
||||
}
|
||||
}
|
|
@ -1,112 +0,0 @@
|
|||
package com.github.codeql.test;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.nio.file.CopyOption;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.Supplier;
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.nio.file.FileVisitOption;
|
||||
import java.net.URLConnection;
|
||||
import java.util.concurrent.FutureTask;
|
||||
|
||||
class Test {
|
||||
public static void main(String[] args) throws Exception {
|
||||
AtomicReference<String> reference = new AtomicReference<>(); // uninteresting (parameterless constructor)
|
||||
reference.set( // $ sinkModelCandidate=set(Object):Argument[this]
|
||||
args[0] // $ negativeSinkExample=set(Object):Argument[0] // modeled as a flow step
|
||||
); // not a source candidate (return type is void)
|
||||
}
|
||||
|
||||
public static void callSupplier(Supplier<String> supplier) {
|
||||
supplier.get(); // not a source candidate (lambda flow)
|
||||
}
|
||||
|
||||
public static void copyFiles(Path source, Path target, CopyOption option) throws Exception {
|
||||
Files.copy(
|
||||
source, // $ positiveSinkExample=copy(Path,Path,CopyOption[]):Argument[0](path-injection)
|
||||
target, // $ positiveSinkExample=copy(Path,Path,CopyOption[]):Argument[1](path-injection)
|
||||
option // no candidate (not modeled, but source and target are modeled)
|
||||
); // $ sourceModelCandidate=copy(Path,Path,CopyOption[]):ReturnValue
|
||||
}
|
||||
|
||||
public static InputStream getInputStream(Path openPath) throws Exception {
|
||||
return Files.newInputStream(
|
||||
openPath // $ sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[0] positiveSinkExample=newInputStream(Path,OpenOption[]):Argument[0](path-injection) // sink candidate because "only" ai-modeled, and useful as a candidate in regression testing
|
||||
); // $ sourceModelCandidate=newInputStream(Path,OpenOption[]):ReturnValue
|
||||
}
|
||||
|
||||
public static InputStream getInputStream(String openPath, String otherPath) throws Exception {
|
||||
return Test.getInputStream( // the call is not a source candidate (argument to local call)
|
||||
Paths.get(
|
||||
openPath, // $ negativeSinkExample=get(String,String[]):Argument[0] // modeled as a flow step
|
||||
otherPath
|
||||
) // $ sourceModelCandidate=get(String,String[]):ReturnValue negativeSinkExample=get(String,String[]):Argument[1]
|
||||
);
|
||||
}
|
||||
|
||||
public static int compareFiles(File f1, File f2) {
|
||||
return f1.compareTo( // $ negativeSinkExample=compareTo(File):Argument[this]
|
||||
f2 // $ negativeSinkExample=compareTo(File):Argument[0] // modeled as not a sink
|
||||
); // not a source candidate (return type is int)
|
||||
}
|
||||
|
||||
public static void FilesWalkExample(Path p, FileVisitOption o) throws Exception {
|
||||
Files.walk(
|
||||
p, // $ negativeSinkExample=walk(Path,FileVisitOption[]):Argument[0] // modeled as a flow step
|
||||
o, // the implicit varargs array is a candidate, annotated on the last line of the call
|
||||
o // not a candidate (only the first arg corresponding to a varargs array
|
||||
// is extracted)
|
||||
); // $ sourceModelCandidate=walk(Path,FileVisitOption[]):ReturnValue sinkModelCandidate=walk(Path,FileVisitOption[]):Argument[1]
|
||||
}
|
||||
|
||||
public static void WebSocketExample(URLConnection c) throws Exception {
|
||||
c.getInputStream(); // $ sinkModelCandidate=getInputStream():Argument[this] positiveSourceExample=getInputStream():ReturnValue(remote) // not a source candidate (manual modeling)
|
||||
c.connect(); // $ sinkModelCandidate=connect():Argument[this] // not a source candidate (return type is void)
|
||||
}
|
||||
|
||||
public static void fileFilterExample(File f, FileFilter ff) {
|
||||
f.listFiles( // $ sinkModelCandidate=listFiles(FileFilter):Argument[this]
|
||||
ff
|
||||
); // $ sourceModelCandidate=listFiles(FileFilter):ReturnValue
|
||||
}
|
||||
}
|
||||
|
||||
class OverrideTest extends Exception {
|
||||
public void printStackTrace(PrintWriter writer) { // $ sourceModelCandidate=printStackTrace(PrintWriter):Parameter[0]
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class TaskUtils {
|
||||
public FutureTask getTask() {
|
||||
FutureTask ft = new FutureTask(() -> {
|
||||
// ^-- no sink candidate for the `this` qualifier of a constructor
|
||||
return 42;
|
||||
});
|
||||
return ft;
|
||||
}
|
||||
}
|
||||
|
||||
class MoreTests {
|
||||
public static void FilesListExample(Path p) throws Exception {
|
||||
Files.list(
|
||||
Files.createDirectories(
|
||||
p // $ positiveSinkExample=createDirectories(Path,FileAttribute[]):Argument[0](path-injection)
|
||||
) // $ sourceModelCandidate=createDirectories(Path,FileAttribute[]):ReturnValue negativeSinkExample=list(Path):Argument[0] // modeled as a flow step
|
||||
); // $ sourceModelCandidate=list(Path):ReturnValue
|
||||
|
||||
Files.delete(
|
||||
p // $ sinkModelCandidate=delete(Path):Argument[0] positiveSinkExample=delete(Path):Argument[0](path-injection)
|
||||
); // not a source candidate (return type is void)
|
||||
|
||||
Files.deleteIfExists(
|
||||
p // $ sinkModelCandidate=deleteIfExists(Path):Argument[0] positiveSinkExample=deleteIfExists(Path):Argument[0](path-injection)
|
||||
); // not a source candidate (return type is boolean)
|
||||
}
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
package hudson;
|
||||
|
||||
/** Plugin doc */
|
||||
public class Plugin {
|
||||
/** Configure method doc */
|
||||
public void configure(String name, String value) {}
|
||||
}
|
|
@ -1,77 +0,0 @@
|
|||
import java
|
||||
import TestUtilities.InlineExpectationsTest
|
||||
import AutomodelSharedCharacteristics
|
||||
|
||||
signature module TestHelperSig<CandidateSig Candidate> {
|
||||
Location getEndpointLocation(Candidate::Endpoint e);
|
||||
|
||||
predicate isCandidate(
|
||||
Candidate::Endpoint e, string name, string signature, string input, string output,
|
||||
string extensibleType
|
||||
);
|
||||
|
||||
predicate isPositiveExample(
|
||||
Candidate::Endpoint e, string endpointType, string name, string signature, string input,
|
||||
string output, string extensibleType
|
||||
);
|
||||
|
||||
predicate isNegativeExample(
|
||||
Candidate::Endpoint e, string name, string signature, string input, string output,
|
||||
string extensibleType
|
||||
);
|
||||
}
|
||||
|
||||
module Extraction<CandidateSig Candidate, TestHelperSig<Candidate> TestHelper> implements TestSig {
|
||||
string getARelevantTag() {
|
||||
result in [
|
||||
"sourceModelCandidate", "sinkModelCandidate", // a candidate source/sink
|
||||
"positiveSourceExample", "positiveSinkExample", // a known source/sink
|
||||
"negativeSourceExample", "negativeSinkExample" // a known non-source/non-sink
|
||||
]
|
||||
}
|
||||
|
||||
/**
|
||||
* If `extensibleType` is `sourceModel` then the result is `ifSource`, if it
|
||||
* is `sinkModel` then the result is `ifSink`.
|
||||
*/
|
||||
bindingset[extensibleType, ifSource, ifSink]
|
||||
private string ifSource(string extensibleType, string ifSource, string ifSink) {
|
||||
extensibleType = "sourceModel" and result = ifSource
|
||||
or
|
||||
extensibleType = "sinkModel" and result = ifSink
|
||||
}
|
||||
|
||||
additional predicate selectEndpoint(
|
||||
Candidate::Endpoint endpoint, string name, string signature, string input, string output,
|
||||
string extensibleType, string tag, string suffix
|
||||
) {
|
||||
TestHelper::isCandidate(endpoint, name, signature, input, output, extensibleType) and
|
||||
tag = ifSource(extensibleType, "sourceModelCandidate", "sinkModelCandidate") and
|
||||
suffix = ""
|
||||
or
|
||||
TestHelper::isNegativeExample(endpoint, name, signature, input, output, extensibleType) and
|
||||
tag = "negative" + ifSource(extensibleType, "Source", "Sink") + "Example" and
|
||||
suffix = ""
|
||||
or
|
||||
exists(string endpointType |
|
||||
TestHelper::isPositiveExample(endpoint, endpointType, name, signature, input, output,
|
||||
extensibleType) and
|
||||
tag = "positive" + ifSource(extensibleType, "Source", "Sink") + "Example" and
|
||||
suffix = "(" + endpointType + ")"
|
||||
)
|
||||
}
|
||||
|
||||
predicate hasActualResult(Location location, string element, string tag, string value) {
|
||||
exists(
|
||||
Candidate::Endpoint endpoint, string name, string signature, string input, string output,
|
||||
string extensibleType, string suffix
|
||||
|
|
||||
selectEndpoint(endpoint, name, signature, input, output, extensibleType, tag, suffix)
|
||||
|
|
||||
TestHelper::getEndpointLocation(endpoint) = location and
|
||||
endpoint.toString() = element and
|
||||
// for source models only the output is relevant, and vice versa for sink models
|
||||
value = name + signature + ":" + ifSource(extensibleType, output, input) + suffix
|
||||
)
|
||||
}
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
testFailures
|
||||
failures
|
|
@ -1,35 +0,0 @@
|
|||
import java
|
||||
import AutomodelFrameworkModeCharacteristics as Characteristics
|
||||
import AutomodelExtractionTests
|
||||
|
||||
module TestHelper implements TestHelperSig<Characteristics::FrameworkCandidatesImpl> {
|
||||
Location getEndpointLocation(Characteristics::Endpoint endpoint) {
|
||||
result = endpoint.asTop().getLocation()
|
||||
}
|
||||
|
||||
predicate isCandidate(
|
||||
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
|
||||
string extensibleType
|
||||
) {
|
||||
Characteristics::isCandidate(endpoint, _, _, _, name, signature, input, output, _,
|
||||
extensibleType, _)
|
||||
}
|
||||
|
||||
predicate isPositiveExample(
|
||||
Characteristics::Endpoint endpoint, string endpointType, string name, string signature,
|
||||
string input, string output, string extensibleType
|
||||
) {
|
||||
Characteristics::isPositiveExample(endpoint, endpointType, _, _, _, name, signature, input,
|
||||
output, _, extensibleType)
|
||||
}
|
||||
|
||||
predicate isNegativeExample(
|
||||
Characteristics::Endpoint endpoint, string name, string signature, string input, string output,
|
||||
string extensibleType
|
||||
) {
|
||||
Characteristics::isNegativeExample(endpoint, _, _, _, _, _, name, signature, input, output, _,
|
||||
extensibleType)
|
||||
}
|
||||
}
|
||||
|
||||
import MakeTest<Extraction<Characteristics::FrameworkCandidatesImpl, TestHelper>>
|
|
@ -1,15 +0,0 @@
|
|||
package com.github.codeql.test;
|
||||
|
||||
public class MyWriter extends java.io.Writer {
|
||||
@Override
|
||||
public void write(char[] cbuf, int off, int len) { // $ sinkModelCandidate=write(char[],int,int):Argument[this] positiveSinkExample=write(char[],int,int):Argument[0](file-content-store) sourceModelCandidate=write(char[],int,int):Parameter[this] sourceModelCandidate=write(char[],int,int):Parameter[0]
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() { // $ sinkModelCandidate=close():Argument[this] sourceModelCandidate=close():Parameter[this]
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() { // $ sinkModelCandidate=flush():Argument[this] sourceModelCandidate=flush():Parameter[this]
|
||||
}
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
package com.github.codeql.test;
|
||||
|
||||
/**
|
||||
* No candidates in this class, as it's not public!
|
||||
*/
|
||||
class NonPublicClass {
|
||||
public void noCandidates(String here) {
|
||||
System.out.println(here);
|
||||
}
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
package com.github.codeql.test;
|
||||
|
||||
public class PublicClass {
|
||||
public void stuff(String arg) { // $ sinkModelCandidate=stuff(String):Argument[this] sourceModelCandidate=stuff(String):Parameter[this] sinkModelCandidate=stuff(String):Argument[0] sourceModelCandidate=stuff(String):Parameter[0] // source candidates because it is an overrideable method
|
||||
System.out.println(arg);
|
||||
}
|
||||
|
||||
public static void staticStuff(String arg) { // $ sinkModelCandidate=staticStuff(String):Argument[0] // `arg` is not a source candidate (not overrideabe); `this` is not a candidate (static method)
|
||||
System.out.println(arg);
|
||||
}
|
||||
|
||||
protected void nonPublicStuff(String arg) { // $ sinkModelCandidate=nonPublicStuff(String):Argument[this] sourceModelCandidate=nonPublicStuff(String):Parameter[this] sinkModelCandidate=nonPublicStuff(String):Argument[0] sourceModelCandidate=nonPublicStuff(String):Parameter[0]
|
||||
System.out.println(arg);
|
||||
}
|
||||
|
||||
void packagePrivateStuff(String arg) { // no candidates because the method is not public
|
||||
System.out.println(arg);
|
||||
}
|
||||
|
||||
public PublicClass(Object input) { // $ sourceModelCandidate=PublicClass(Object):ReturnValue sinkModelCandidate=PublicClass(Object):Argument[0] // `this` is not a candidate because it is a constructor
|
||||
}
|
||||
|
||||
// `input` and `input` are source candidates, but not sink candidates (is-style method)
|
||||
public Boolean isIgnored(Object input) { // $ negativeSinkExample=isIgnored(Object):Argument[this] sourceModelCandidate=isIgnored(Object):Parameter[this] negativeSinkExample=isIgnored(Object):Argument[0] sourceModelCandidate=isIgnored(Object):Parameter[0]
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
package com.github.codeql.test;
|
||||
|
||||
public interface PublicInterface {
|
||||
public int stuff(String arg); // $ sinkModelCandidate=stuff(String):Argument[this] sourceModelCandidate=stuff(String):Parameter[this] sinkModelCandidate=stuff(String):Argument[0] sourceModelCandidate=stuff(String):Parameter[0] // result is _not_ a source candidate source (primitive return type)
|
||||
|
||||
public static void staticStuff(String arg) { // $ sinkModelCandidate=staticStuff(String):Argument[0] // not a source candidate (static method)
|
||||
System.out.println(arg);
|
||||
}
|
||||
}
|
|
@ -1,13 +0,0 @@
|
|||
package java.io;
|
||||
|
||||
public class File {
|
||||
public int compareTo( // $ negativeSinkExample=compareTo(File):Argument[this] sourceModelCandidate=compareTo(File):Parameter[this] // modeled as neutral for sinks
|
||||
File pathname // $ negativeSinkExample=compareTo(File):Argument[0] sourceModelCandidate=compareTo(File):Parameter[0] // modeled as neutral for sinks
|
||||
) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public boolean setLastModified(long time) { // $ sinkModelCandidate=setLastModified(long):Argument[this] sourceModelCandidate=setLastModified(long):Parameter[this] // time is not a candidate (primitive type)
|
||||
return false;
|
||||
} // return value is not a source candidate because it's a primitive
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package java.nio.file;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.OpenOption;
|
||||
|
||||
public class Files {
|
||||
public static void copy( // method result is not a candidate source (void)
|
||||
Path source, // $ positiveSinkExample=copy(Path,OutputStream):Argument[0](path-injection) // manual model exists
|
||||
OutputStream out // $ sinkModelCandidate=copy(Path,OutputStream):Argument[1]
|
||||
/* NB: may be worthwhile to implement the
|
||||
same behavior as in application mode where out would not be a
|
||||
candidate because there already is a model for another parameter of
|
||||
the same method and we assume that methods are always modeled
|
||||
completely.
|
||||
*/
|
||||
) throws IOException {
|
||||
// ...
|
||||
}
|
||||
|
||||
public static InputStream newInputStream( // $ sourceModelCandidate=newInputStream(Path,OpenOption[]):ReturnValue
|
||||
Path openPath, // $ positiveSinkExample=newInputStream(Path,OpenOption[]):Argument[0](path-injection) sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[0] // known sink, but still a candidate (ai-modeled, and useful as a candidate in regression testing)
|
||||
OpenOption... options // $ sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[1]
|
||||
) throws IOException {
|
||||
return new FileInputStream(openPath.toFile());
|
||||
}
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
---
|
||||
category: breaking
|
||||
---
|
||||
* CodeQL package management is now generally available, and all GitHub-produced CodeQL packages have had their version numbers increased to 1.0.0.
|
|
@ -1,13 +0,0 @@
|
|||
name: codeql/java-automodel-tests
|
||||
version: 1.0.0-dev
|
||||
groups:
|
||||
- java
|
||||
- automodel
|
||||
- test
|
||||
dependencies:
|
||||
codeql/java-all: ${workspace}
|
||||
codeql/java-automodel-queries: ${workspace}
|
||||
codeql/java-tests: ${workspace}
|
||||
extractor: java
|
||||
tests: .
|
||||
warnOnImplicitThis: true
|
Загрузка…
Ссылка в новой задаче