зеркало из https://github.com/github/codeql.git
Merge pull request #551 from asger-semmle/js-extractor-shebang
Approved by xiemaisi
This commit is contained in:
Коммит
e66691a90c
|
@ -5,6 +5,7 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
@ -38,6 +39,11 @@ public class FileExtractor {
|
|||
*/
|
||||
public static final Pattern JSON_OBJECT_START = Pattern.compile("^(?s)\\s*\\{\\s*\"([^\"]|\\\\.)*\"\\s*:.*");
|
||||
|
||||
/**
|
||||
* The charset for decoding UTF-8 strings.
|
||||
*/
|
||||
private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
|
||||
|
||||
/**
|
||||
* Information about supported file types.
|
||||
*/
|
||||
|
@ -169,6 +175,11 @@ public class FileExtractor {
|
|||
if (isXml(bytes, length))
|
||||
return true;
|
||||
|
||||
// Avoid files with an unrecognized shebang header.
|
||||
if (hasUnrecognizedShebang(bytes, length)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (IOException e) {
|
||||
Exceptions.ignore(e, "Let extractor handle this one.");
|
||||
|
@ -249,6 +260,38 @@ public class FileExtractor {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the byte sequence starts with a shebang line that is not
|
||||
* recognized as a JavaScript interpreter.
|
||||
*/
|
||||
private boolean hasUnrecognizedShebang(byte[] bytes, int length) {
|
||||
// Shebangs preceded by a BOM aren't recognized in UNIX, but the BOM might only
|
||||
// be present in the source file, to be stripped out in the build process.
|
||||
int startIndex = skipBOM(bytes, length);
|
||||
if (startIndex + 2 >= length) return false;
|
||||
if (bytes[startIndex] != '#' || bytes[startIndex + 1] != '!') {
|
||||
return false;
|
||||
}
|
||||
int endOfLine = -1;
|
||||
for (int i = startIndex; i < length; ++i) {
|
||||
if (bytes[i] == '\r' || bytes[i] == '\n') {
|
||||
endOfLine = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (endOfLine == -1) {
|
||||
// The shebang is either very long or there are no other lines in the file.
|
||||
// Treat this as unrecognized.
|
||||
return true;
|
||||
}
|
||||
// Extract the shebang text
|
||||
int startOfText = startIndex + "#!".length();
|
||||
int lengthOfText = endOfLine - startOfText;
|
||||
String text = new String(bytes, startOfText, lengthOfText, UTF8_CHARSET);
|
||||
// Check if the shebang is a recognized JavaScript intepreter.
|
||||
return !NODE_INVOCATION.matcher(text).find();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
|
||||
return new TypeScriptExtractor(config, state.getTypeScriptParser());
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
#!/usr/bin/env perl
|
||||
|
||||
use strict;
|
||||
|
||||
exit 0;
|
|
@ -0,0 +1,4 @@
|
|||
#!/usr/bin/env node
|
||||
interface Foo {
|
||||
x: number;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
interface Foo {
|
||||
x: number;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"typescript": true
|
||||
}
|
|
@ -0,0 +1,129 @@
|
|||
#10000=@"/typescript-with-shebang.ts;sourcefile"
|
||||
files(#10000,"/typescript-with-shebang.ts","typescript-with-shebang","ts",0)
|
||||
#10001=@"/;folder"
|
||||
folders(#10001,"/","")
|
||||
containerparent(#10001,#10000)
|
||||
#10002=@"loc,{#10000},0,0,0,0"
|
||||
locations_default(#10002,#10000,0,0,0,0)
|
||||
hasLocation(#10000,#10002)
|
||||
#20000=@"global_scope"
|
||||
scopes(#20000,0)
|
||||
#20001=@"script;{#10000},1,1"
|
||||
toplevels(#20001,0)
|
||||
#20002=@"loc,{#10000},1,1,5,0"
|
||||
locations_default(#20002,#10000,1,1,5,0)
|
||||
hasLocation(#20001,#20002)
|
||||
#20003=@"local_type_name;{Foo};{#20000}"
|
||||
local_type_names(#20003,"Foo",#20000)
|
||||
#20004=*
|
||||
stmts(#20004,34,#20001,0,"#!/usr/ ... mber;\n}")
|
||||
#20005=@"loc,{#10000},1,1,4,1"
|
||||
locations_default(#20005,#10000,1,1,4,1)
|
||||
hasLocation(#20004,#20005)
|
||||
stmtContainers(#20004,#20001)
|
||||
#20006=*
|
||||
typeexprs(#20006,1,#20004,0,"Foo")
|
||||
#20007=@"loc,{#10000},2,11,2,13"
|
||||
locations_default(#20007,#10000,2,11,2,13)
|
||||
hasLocation(#20006,#20007)
|
||||
enclosingStmt(#20006,#20004)
|
||||
exprContainers(#20006,#20001)
|
||||
literals("Foo","Foo",#20006)
|
||||
typedecl(#20006,#20003)
|
||||
#20008=*
|
||||
properties(#20008,#20004,2,8,"x: number;")
|
||||
#20009=@"loc,{#10000},3,3,3,12"
|
||||
locations_default(#20009,#10000,3,3,3,12)
|
||||
hasLocation(#20008,#20009)
|
||||
#20010=*
|
||||
exprs(#20010,0,#20008,0,"x")
|
||||
#20011=@"loc,{#10000},3,3,3,3"
|
||||
locations_default(#20011,#10000,3,3,3,3)
|
||||
hasLocation(#20010,#20011)
|
||||
enclosingStmt(#20010,#20004)
|
||||
exprContainers(#20010,#20001)
|
||||
literals("x","x",#20010)
|
||||
isAbstractMember(#20008)
|
||||
#20012=*
|
||||
typeexprs(#20012,2,#20008,2,"number")
|
||||
#20013=@"loc,{#10000},3,6,3,11"
|
||||
locations_default(#20013,#10000,3,6,3,11)
|
||||
hasLocation(#20012,#20013)
|
||||
enclosingStmt(#20012,#20004)
|
||||
exprContainers(#20012,#20001)
|
||||
literals("number","number",#20012)
|
||||
#20014=*
|
||||
lines(#20014,#20001,"#!/usr/bin/env node","
|
||||
")
|
||||
#20015=@"loc,{#10000},1,1,1,19"
|
||||
locations_default(#20015,#10000,1,1,1,19)
|
||||
hasLocation(#20014,#20015)
|
||||
#20016=*
|
||||
lines(#20016,#20001,"interface Foo {","
|
||||
")
|
||||
#20017=@"loc,{#10000},2,1,2,15"
|
||||
locations_default(#20017,#10000,2,1,2,15)
|
||||
hasLocation(#20016,#20017)
|
||||
#20018=*
|
||||
lines(#20018,#20001," x: number;","
|
||||
")
|
||||
#20019=@"loc,{#10000},3,1,3,12"
|
||||
locations_default(#20019,#10000,3,1,3,12)
|
||||
hasLocation(#20018,#20019)
|
||||
indentation(#10000,3," ",2)
|
||||
#20020=*
|
||||
lines(#20020,#20001,"}","
|
||||
")
|
||||
#20021=@"loc,{#10000},4,1,4,1"
|
||||
locations_default(#20021,#10000,4,1,4,1)
|
||||
hasLocation(#20020,#20021)
|
||||
numlines(#20001,4,3,0)
|
||||
#20022=*
|
||||
tokeninfo(#20022,7,#20001,0,"interface")
|
||||
#20023=@"loc,{#10000},2,1,2,9"
|
||||
locations_default(#20023,#10000,2,1,2,9)
|
||||
hasLocation(#20022,#20023)
|
||||
#20024=*
|
||||
tokeninfo(#20024,6,#20001,1,"Foo")
|
||||
hasLocation(#20024,#20007)
|
||||
#20025=*
|
||||
tokeninfo(#20025,8,#20001,2,"{")
|
||||
#20026=@"loc,{#10000},2,15,2,15"
|
||||
locations_default(#20026,#10000,2,15,2,15)
|
||||
hasLocation(#20025,#20026)
|
||||
#20027=*
|
||||
tokeninfo(#20027,6,#20001,3,"x")
|
||||
hasLocation(#20027,#20011)
|
||||
#20028=*
|
||||
tokeninfo(#20028,8,#20001,4,":")
|
||||
#20029=@"loc,{#10000},3,4,3,4"
|
||||
locations_default(#20029,#10000,3,4,3,4)
|
||||
hasLocation(#20028,#20029)
|
||||
#20030=*
|
||||
tokeninfo(#20030,7,#20001,5,"number")
|
||||
hasLocation(#20030,#20013)
|
||||
#20031=*
|
||||
tokeninfo(#20031,8,#20001,6,";")
|
||||
#20032=@"loc,{#10000},3,12,3,12"
|
||||
locations_default(#20032,#10000,3,12,3,12)
|
||||
hasLocation(#20031,#20032)
|
||||
#20033=*
|
||||
tokeninfo(#20033,8,#20001,7,"}")
|
||||
hasLocation(#20033,#20021)
|
||||
#20034=*
|
||||
tokeninfo(#20034,0,#20001,8,"")
|
||||
#20035=@"loc,{#10000},5,1,5,0"
|
||||
locations_default(#20035,#10000,5,1,5,0)
|
||||
hasLocation(#20034,#20035)
|
||||
#20036=*
|
||||
entry_cfg_node(#20036,#20001)
|
||||
#20037=@"loc,{#10000},1,1,1,0"
|
||||
locations_default(#20037,#10000,1,1,1,0)
|
||||
hasLocation(#20036,#20037)
|
||||
#20038=*
|
||||
exit_cfg_node(#20038,#20001)
|
||||
hasLocation(#20038,#20035)
|
||||
successor(#20004,#20038)
|
||||
successor(#20036,#20004)
|
||||
numlines(#10000,4,3,0)
|
||||
filetype(#10000,"typescript")
|
|
@ -0,0 +1,123 @@
|
|||
#10000=@"/typescript.ts;sourcefile"
|
||||
files(#10000,"/typescript.ts","typescript","ts",0)
|
||||
#10001=@"/;folder"
|
||||
folders(#10001,"/","")
|
||||
containerparent(#10001,#10000)
|
||||
#10002=@"loc,{#10000},0,0,0,0"
|
||||
locations_default(#10002,#10000,0,0,0,0)
|
||||
hasLocation(#10000,#10002)
|
||||
#20000=@"global_scope"
|
||||
scopes(#20000,0)
|
||||
#20001=@"script;{#10000},1,1"
|
||||
toplevels(#20001,0)
|
||||
#20002=@"loc,{#10000},1,1,4,0"
|
||||
locations_default(#20002,#10000,1,1,4,0)
|
||||
hasLocation(#20001,#20002)
|
||||
#20003=@"local_type_name;{Foo};{#20000}"
|
||||
local_type_names(#20003,"Foo",#20000)
|
||||
#20004=*
|
||||
stmts(#20004,34,#20001,0,"interfa ... mber;\n}")
|
||||
#20005=@"loc,{#10000},1,1,3,1"
|
||||
locations_default(#20005,#10000,1,1,3,1)
|
||||
hasLocation(#20004,#20005)
|
||||
stmtContainers(#20004,#20001)
|
||||
#20006=*
|
||||
typeexprs(#20006,1,#20004,0,"Foo")
|
||||
#20007=@"loc,{#10000},1,11,1,13"
|
||||
locations_default(#20007,#10000,1,11,1,13)
|
||||
hasLocation(#20006,#20007)
|
||||
enclosingStmt(#20006,#20004)
|
||||
exprContainers(#20006,#20001)
|
||||
literals("Foo","Foo",#20006)
|
||||
typedecl(#20006,#20003)
|
||||
#20008=*
|
||||
properties(#20008,#20004,2,8,"x: number;")
|
||||
#20009=@"loc,{#10000},2,3,2,12"
|
||||
locations_default(#20009,#10000,2,3,2,12)
|
||||
hasLocation(#20008,#20009)
|
||||
#20010=*
|
||||
exprs(#20010,0,#20008,0,"x")
|
||||
#20011=@"loc,{#10000},2,3,2,3"
|
||||
locations_default(#20011,#10000,2,3,2,3)
|
||||
hasLocation(#20010,#20011)
|
||||
enclosingStmt(#20010,#20004)
|
||||
exprContainers(#20010,#20001)
|
||||
literals("x","x",#20010)
|
||||
isAbstractMember(#20008)
|
||||
#20012=*
|
||||
typeexprs(#20012,2,#20008,2,"number")
|
||||
#20013=@"loc,{#10000},2,6,2,11"
|
||||
locations_default(#20013,#10000,2,6,2,11)
|
||||
hasLocation(#20012,#20013)
|
||||
enclosingStmt(#20012,#20004)
|
||||
exprContainers(#20012,#20001)
|
||||
literals("number","number",#20012)
|
||||
#20014=*
|
||||
lines(#20014,#20001,"interface Foo {","
|
||||
")
|
||||
#20015=@"loc,{#10000},1,1,1,15"
|
||||
locations_default(#20015,#10000,1,1,1,15)
|
||||
hasLocation(#20014,#20015)
|
||||
#20016=*
|
||||
lines(#20016,#20001," x: number;","
|
||||
")
|
||||
#20017=@"loc,{#10000},2,1,2,12"
|
||||
locations_default(#20017,#10000,2,1,2,12)
|
||||
hasLocation(#20016,#20017)
|
||||
indentation(#10000,2," ",2)
|
||||
#20018=*
|
||||
lines(#20018,#20001,"}","
|
||||
")
|
||||
#20019=@"loc,{#10000},3,1,3,1"
|
||||
locations_default(#20019,#10000,3,1,3,1)
|
||||
hasLocation(#20018,#20019)
|
||||
numlines(#20001,3,3,0)
|
||||
#20020=*
|
||||
tokeninfo(#20020,7,#20001,0,"interface")
|
||||
#20021=@"loc,{#10000},1,1,1,9"
|
||||
locations_default(#20021,#10000,1,1,1,9)
|
||||
hasLocation(#20020,#20021)
|
||||
#20022=*
|
||||
tokeninfo(#20022,6,#20001,1,"Foo")
|
||||
hasLocation(#20022,#20007)
|
||||
#20023=*
|
||||
tokeninfo(#20023,8,#20001,2,"{")
|
||||
#20024=@"loc,{#10000},1,15,1,15"
|
||||
locations_default(#20024,#10000,1,15,1,15)
|
||||
hasLocation(#20023,#20024)
|
||||
#20025=*
|
||||
tokeninfo(#20025,6,#20001,3,"x")
|
||||
hasLocation(#20025,#20011)
|
||||
#20026=*
|
||||
tokeninfo(#20026,8,#20001,4,":")
|
||||
#20027=@"loc,{#10000},2,4,2,4"
|
||||
locations_default(#20027,#10000,2,4,2,4)
|
||||
hasLocation(#20026,#20027)
|
||||
#20028=*
|
||||
tokeninfo(#20028,7,#20001,5,"number")
|
||||
hasLocation(#20028,#20013)
|
||||
#20029=*
|
||||
tokeninfo(#20029,8,#20001,6,";")
|
||||
#20030=@"loc,{#10000},2,12,2,12"
|
||||
locations_default(#20030,#10000,2,12,2,12)
|
||||
hasLocation(#20029,#20030)
|
||||
#20031=*
|
||||
tokeninfo(#20031,8,#20001,7,"}")
|
||||
hasLocation(#20031,#20019)
|
||||
#20032=*
|
||||
tokeninfo(#20032,0,#20001,8,"")
|
||||
#20033=@"loc,{#10000},4,1,4,0"
|
||||
locations_default(#20033,#10000,4,1,4,0)
|
||||
hasLocation(#20032,#20033)
|
||||
#20034=*
|
||||
entry_cfg_node(#20034,#20001)
|
||||
#20035=@"loc,{#10000},1,1,1,0"
|
||||
locations_default(#20035,#10000,1,1,1,0)
|
||||
hasLocation(#20034,#20035)
|
||||
#20036=*
|
||||
exit_cfg_node(#20036,#20001)
|
||||
hasLocation(#20036,#20033)
|
||||
successor(#20004,#20036)
|
||||
successor(#20034,#20004)
|
||||
numlines(#10000,3,3,0)
|
||||
filetype(#10000,"typescript")
|
Загрузка…
Ссылка в новой задаче