Merge pull request #13928 from asgerf/js/ignore-huge-files

JS: Ignore files larger than 10 MB during extraction
This commit is contained in:
Asger F 2023-08-23 15:09:58 +02:00 коммит произвёл GitHub
Родитель d2fca1b804 b93e404441
Коммит d146514275
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 49 добавлений и 24 удалений

Просмотреть файл

@ -222,6 +222,7 @@ public class AutoBuild {
private boolean installDependencies = false;
private final VirtualSourceRoot virtualSourceRoot;
private ExtractorState state;
private final long maximumFileSizeInMegabytes;
/** The default timeout when installing dependencies, in milliseconds. */
public static final int INSTALL_DEPENDENCIES_DEFAULT_TIMEOUT = 10 * 60 * 1000; // 10 minutes
@ -236,6 +237,7 @@ public class AutoBuild {
this.defaultEncoding = getEnvVar("LGTM_INDEX_DEFAULT_ENCODING");
this.installDependencies = Boolean.valueOf(getEnvVar("LGTM_INDEX_TYPESCRIPT_INSTALL_DEPS"));
this.virtualSourceRoot = makeVirtualSourceRoot();
this.maximumFileSizeInMegabytes = EnvironmentVariables.getMegabyteCountFromPrefixedEnv("MAX_FILE_SIZE", 10);
setupFileTypes();
setupXmlMode();
setupMatchers();
@ -446,8 +448,8 @@ public class AutoBuild {
}
/**
* Returns whether the autobuilder has seen code.
* This is overridden in tests.
* Returns whether the autobuilder has seen code.
* This is overridden in tests.
*/
protected boolean hasSeenCode() {
return seenCode;
@ -741,12 +743,12 @@ public class AutoBuild {
dependencyInstallationResult = this.preparePackagesAndDependencies(filesToExtract);
}
Set<Path> extractedFiles = new LinkedHashSet<>();
// Extract HTML files as they may contain TypeScript
CompletableFuture<?> htmlFuture = extractFiles(
filesToExtract, extractedFiles, extractors,
f -> extractors.fileType(f) == FileType.HTML);
htmlFuture.join(); // Wait for HTML extraction to be finished.
// extract TypeScript projects and files
@ -1229,6 +1231,11 @@ protected DependencyInstallationResult preparePackagesAndDependencies(Set<Path>
warn("Skipping " + file + ", which does not exist.");
return;
}
long fileSize = f.length();
if (fileSize > 1_000_000L * this.maximumFileSizeInMegabytes) {
warn("Skipping " + file + " because it is too large (" + StringUtil.printFloat(fileSize / 1_000_000.0) + " MB). The limit is " + this.maximumFileSizeInMegabytes + " MB.");
return;
}
try {
long start = logBeginProcess("Extracting " + file);

Просмотреть файл

@ -1,5 +1,6 @@
package com.semmle.js.extractor;
import com.semmle.util.data.UnitParser;
import com.semmle.util.exception.UserError;
import com.semmle.util.process.Env;
import com.semmle.util.process.Env.Var;
@ -7,7 +8,7 @@ import com.semmle.util.process.Env.Var;
public class EnvironmentVariables {
public static final String CODEQL_EXTRACTOR_JAVASCRIPT_ROOT_ENV_VAR =
"CODEQL_EXTRACTOR_JAVASCRIPT_ROOT";
public static final String CODEQL_EXTRACTOR_JAVASCRIPT_SCRATCH_DIR_ENV_VAR =
"CODEQL_EXTRACTOR_JAVASCRIPT_SCRATCH_DIR";
@ -19,6 +20,36 @@ public class EnvironmentVariables {
public static final String CODEQL_DIST_ENV_VAR = "CODEQL_DIST";
/**
* Returns a number of megabytes by reading an environment variable with the given suffix,
* or the default value if not set.
* <p>
* The following prefixes are tried:
* <code>CODEQL_EXTRACTOR_JAVASCRIPT_</code>,
* <code>LGTM_</code>,
* <code>SEMMLE_</code>.
*/
public static int getMegabyteCountFromPrefixedEnv(String suffix, int defaultValue) {
String envVar = "CODEQL_EXTRACTOR_JAVASCRIPT_" + suffix;
String value = Env.systemEnv().get(envVar);
if (value == null || value.length() == 0) {
envVar = "LGTM_" + suffix;
value = Env.systemEnv().get(envVar);
}
if (value == null || value.length() == 0) {
envVar = "SEMMLE_" + suffix;
value = Env.systemEnv().get(envVar);
}
if (value == null || value.length() == 0) {
return defaultValue;
}
Integer amount = UnitParser.parseOpt(value, UnitParser.MEGABYTES);
if (amount == null) {
throw new UserError("Invalid value for " + envVar + ": '" + value + "'");
}
return amount;
}
/**
* Gets the extractor root based on the <code>CODEQL_EXTRACTOR_JAVASCRIPT_ROOT</code> or <code>
* SEMMLE_DIST</code> or environment variable, or <code>null</code> if neither is set.

Просмотреть файл

@ -273,23 +273,6 @@ public class TypeScriptParser {
return result;
}
private static int getMegabyteCountFromPrefixedEnv(String suffix, int defaultValue) {
String envVar = "SEMMLE_" + suffix;
String value = Env.systemEnv().get(envVar);
if (value == null || value.length() == 0) {
envVar = "LGTM_" + suffix;
value = Env.systemEnv().get(envVar);
}
if (value == null || value.length() == 0) {
return defaultValue;
}
Integer amount = UnitParser.parseOpt(value, UnitParser.MEGABYTES);
if (amount == null) {
throw new UserError("Invalid value for " + envVar + ": '" + value + "'");
}
return amount;
}
/** Start the Node.js parser wrapper process. */
private void setupParserWrapper() {
verifyNodeInstallation();
@ -297,8 +280,8 @@ public class TypeScriptParser {
int mainMemoryMb =
typescriptRam != 0
? typescriptRam
: getMegabyteCountFromPrefixedEnv(TYPESCRIPT_RAM_SUFFIX, 2000);
int reserveMemoryMb = getMegabyteCountFromPrefixedEnv(TYPESCRIPT_RAM_RESERVE_SUFFIX, 400);
: EnvironmentVariables.getMegabyteCountFromPrefixedEnv(TYPESCRIPT_RAM_SUFFIX, 2000);
int reserveMemoryMb = EnvironmentVariables.getMegabyteCountFromPrefixedEnv(TYPESCRIPT_RAM_RESERVE_SUFFIX, 400);
System.out.println("Memory for TypeScript process: " + mainMemoryMb + " MB, and " + reserveMemoryMb + " MB reserve");

Просмотреть файл

@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Files larger than 10 MB are no longer be extracted or analyzed.