Merge pull request #551 from asger-semmle/js-extractor-shebang

Approved by xiemaisi
This commit is contained in:
semmle-qlci 2018-11-28 08:49:44 +00:00 коммит произвёл GitHub
Родитель 31ac33e723 623a80fe90
Коммит e66691a90c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 310 добавлений и 0 удалений

Просмотреть файл

@ -5,6 +5,7 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Pattern;
@ -38,6 +39,11 @@ public class FileExtractor {
*/
public static final Pattern JSON_OBJECT_START = Pattern.compile("^(?s)\\s*\\{\\s*\"([^\"]|\\\\.)*\"\\s*:.*");
/**
* The charset for decoding UTF-8 strings.
*/
private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
/**
* Information about supported file types.
*/
@ -169,6 +175,11 @@ public class FileExtractor {
if (isXml(bytes, length))
return true;
// Avoid files with an unrecognized shebang header.
if (hasUnrecognizedShebang(bytes, length)) {
return true;
}
return false;
} catch (IOException e) {
Exceptions.ignore(e, "Let extractor handle this one.");
@ -249,6 +260,38 @@ public class FileExtractor {
return false;
}
/**
* Returns true if the byte sequence starts with a shebang line that is not
* recognized as a JavaScript interpreter.
*/
private boolean hasUnrecognizedShebang(byte[] bytes, int length) {
// Shebangs preceded by a BOM aren't recognized in UNIX, but the BOM might only
// be present in the source file, to be stripped out in the build process.
int startIndex = skipBOM(bytes, length);
if (startIndex + 2 >= length) return false;
if (bytes[startIndex] != '#' || bytes[startIndex + 1] != '!') {
return false;
}
int endOfLine = -1;
for (int i = startIndex; i < length; ++i) {
if (bytes[i] == '\r' || bytes[i] == '\n') {
endOfLine = i;
break;
}
}
if (endOfLine == -1) {
// The shebang is either very long or there are no other lines in the file.
// Treat this as unrecognized.
return true;
}
// Extract the shebang text
int startOfText = startIndex + "#!".length();
int lengthOfText = endOfLine - startOfText;
String text = new String(bytes, startOfText, lengthOfText, UTF8_CHARSET);
// Check if the shebang is a recognized JavaScript intepreter.
return !NODE_INVOCATION.matcher(text).find();
}
@Override
public IExtractor mkExtractor(ExtractorConfig config, ExtractorState state) {
return new TypeScriptExtractor(config, state.getTypeScriptParser());

Просмотреть файл

@ -0,0 +1,5 @@
#!/usr/bin/env perl
use strict;
exit 0;

Просмотреть файл

@ -0,0 +1,4 @@
#!/usr/bin/env node
interface Foo {
x: number;
}

Просмотреть файл

@ -0,0 +1,3 @@
interface Foo {
x: number;
}

Просмотреть файл

@ -0,0 +1,3 @@
{
"typescript": true
}

Просмотреть файл

@ -0,0 +1,129 @@
#10000=@"/typescript-with-shebang.ts;sourcefile"
files(#10000,"/typescript-with-shebang.ts","typescript-with-shebang","ts",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
toplevels(#20001,0)
#20002=@"loc,{#10000},1,1,5,0"
locations_default(#20002,#10000,1,1,5,0)
hasLocation(#20001,#20002)
#20003=@"local_type_name;{Foo};{#20000}"
local_type_names(#20003,"Foo",#20000)
#20004=*
stmts(#20004,34,#20001,0,"#!/usr/ ... mber;\n}")
#20005=@"loc,{#10000},1,1,4,1"
locations_default(#20005,#10000,1,1,4,1)
hasLocation(#20004,#20005)
stmtContainers(#20004,#20001)
#20006=*
typeexprs(#20006,1,#20004,0,"Foo")
#20007=@"loc,{#10000},2,11,2,13"
locations_default(#20007,#10000,2,11,2,13)
hasLocation(#20006,#20007)
enclosingStmt(#20006,#20004)
exprContainers(#20006,#20001)
literals("Foo","Foo",#20006)
typedecl(#20006,#20003)
#20008=*
properties(#20008,#20004,2,8,"x: number;")
#20009=@"loc,{#10000},3,3,3,12"
locations_default(#20009,#10000,3,3,3,12)
hasLocation(#20008,#20009)
#20010=*
exprs(#20010,0,#20008,0,"x")
#20011=@"loc,{#10000},3,3,3,3"
locations_default(#20011,#10000,3,3,3,3)
hasLocation(#20010,#20011)
enclosingStmt(#20010,#20004)
exprContainers(#20010,#20001)
literals("x","x",#20010)
isAbstractMember(#20008)
#20012=*
typeexprs(#20012,2,#20008,2,"number")
#20013=@"loc,{#10000},3,6,3,11"
locations_default(#20013,#10000,3,6,3,11)
hasLocation(#20012,#20013)
enclosingStmt(#20012,#20004)
exprContainers(#20012,#20001)
literals("number","number",#20012)
#20014=*
lines(#20014,#20001,"#!/usr/bin/env node","
")
#20015=@"loc,{#10000},1,1,1,19"
locations_default(#20015,#10000,1,1,1,19)
hasLocation(#20014,#20015)
#20016=*
lines(#20016,#20001,"interface Foo {","
")
#20017=@"loc,{#10000},2,1,2,15"
locations_default(#20017,#10000,2,1,2,15)
hasLocation(#20016,#20017)
#20018=*
lines(#20018,#20001," x: number;","
")
#20019=@"loc,{#10000},3,1,3,12"
locations_default(#20019,#10000,3,1,3,12)
hasLocation(#20018,#20019)
indentation(#10000,3," ",2)
#20020=*
lines(#20020,#20001,"}","
")
#20021=@"loc,{#10000},4,1,4,1"
locations_default(#20021,#10000,4,1,4,1)
hasLocation(#20020,#20021)
numlines(#20001,4,3,0)
#20022=*
tokeninfo(#20022,7,#20001,0,"interface")
#20023=@"loc,{#10000},2,1,2,9"
locations_default(#20023,#10000,2,1,2,9)
hasLocation(#20022,#20023)
#20024=*
tokeninfo(#20024,6,#20001,1,"Foo")
hasLocation(#20024,#20007)
#20025=*
tokeninfo(#20025,8,#20001,2,"{")
#20026=@"loc,{#10000},2,15,2,15"
locations_default(#20026,#10000,2,15,2,15)
hasLocation(#20025,#20026)
#20027=*
tokeninfo(#20027,6,#20001,3,"x")
hasLocation(#20027,#20011)
#20028=*
tokeninfo(#20028,8,#20001,4,":")
#20029=@"loc,{#10000},3,4,3,4"
locations_default(#20029,#10000,3,4,3,4)
hasLocation(#20028,#20029)
#20030=*
tokeninfo(#20030,7,#20001,5,"number")
hasLocation(#20030,#20013)
#20031=*
tokeninfo(#20031,8,#20001,6,";")
#20032=@"loc,{#10000},3,12,3,12"
locations_default(#20032,#10000,3,12,3,12)
hasLocation(#20031,#20032)
#20033=*
tokeninfo(#20033,8,#20001,7,"}")
hasLocation(#20033,#20021)
#20034=*
tokeninfo(#20034,0,#20001,8,"")
#20035=@"loc,{#10000},5,1,5,0"
locations_default(#20035,#10000,5,1,5,0)
hasLocation(#20034,#20035)
#20036=*
entry_cfg_node(#20036,#20001)
#20037=@"loc,{#10000},1,1,1,0"
locations_default(#20037,#10000,1,1,1,0)
hasLocation(#20036,#20037)
#20038=*
exit_cfg_node(#20038,#20001)
hasLocation(#20038,#20035)
successor(#20004,#20038)
successor(#20036,#20004)
numlines(#10000,4,3,0)
filetype(#10000,"typescript")

Просмотреть файл

@ -0,0 +1,123 @@
#10000=@"/typescript.ts;sourcefile"
files(#10000,"/typescript.ts","typescript","ts",0)
#10001=@"/;folder"
folders(#10001,"/","")
containerparent(#10001,#10000)
#10002=@"loc,{#10000},0,0,0,0"
locations_default(#10002,#10000,0,0,0,0)
hasLocation(#10000,#10002)
#20000=@"global_scope"
scopes(#20000,0)
#20001=@"script;{#10000},1,1"
toplevels(#20001,0)
#20002=@"loc,{#10000},1,1,4,0"
locations_default(#20002,#10000,1,1,4,0)
hasLocation(#20001,#20002)
#20003=@"local_type_name;{Foo};{#20000}"
local_type_names(#20003,"Foo",#20000)
#20004=*
stmts(#20004,34,#20001,0,"interfa ... mber;\n}")
#20005=@"loc,{#10000},1,1,3,1"
locations_default(#20005,#10000,1,1,3,1)
hasLocation(#20004,#20005)
stmtContainers(#20004,#20001)
#20006=*
typeexprs(#20006,1,#20004,0,"Foo")
#20007=@"loc,{#10000},1,11,1,13"
locations_default(#20007,#10000,1,11,1,13)
hasLocation(#20006,#20007)
enclosingStmt(#20006,#20004)
exprContainers(#20006,#20001)
literals("Foo","Foo",#20006)
typedecl(#20006,#20003)
#20008=*
properties(#20008,#20004,2,8,"x: number;")
#20009=@"loc,{#10000},2,3,2,12"
locations_default(#20009,#10000,2,3,2,12)
hasLocation(#20008,#20009)
#20010=*
exprs(#20010,0,#20008,0,"x")
#20011=@"loc,{#10000},2,3,2,3"
locations_default(#20011,#10000,2,3,2,3)
hasLocation(#20010,#20011)
enclosingStmt(#20010,#20004)
exprContainers(#20010,#20001)
literals("x","x",#20010)
isAbstractMember(#20008)
#20012=*
typeexprs(#20012,2,#20008,2,"number")
#20013=@"loc,{#10000},2,6,2,11"
locations_default(#20013,#10000,2,6,2,11)
hasLocation(#20012,#20013)
enclosingStmt(#20012,#20004)
exprContainers(#20012,#20001)
literals("number","number",#20012)
#20014=*
lines(#20014,#20001,"interface Foo {","
")
#20015=@"loc,{#10000},1,1,1,15"
locations_default(#20015,#10000,1,1,1,15)
hasLocation(#20014,#20015)
#20016=*
lines(#20016,#20001," x: number;","
")
#20017=@"loc,{#10000},2,1,2,12"
locations_default(#20017,#10000,2,1,2,12)
hasLocation(#20016,#20017)
indentation(#10000,2," ",2)
#20018=*
lines(#20018,#20001,"}","
")
#20019=@"loc,{#10000},3,1,3,1"
locations_default(#20019,#10000,3,1,3,1)
hasLocation(#20018,#20019)
numlines(#20001,3,3,0)
#20020=*
tokeninfo(#20020,7,#20001,0,"interface")
#20021=@"loc,{#10000},1,1,1,9"
locations_default(#20021,#10000,1,1,1,9)
hasLocation(#20020,#20021)
#20022=*
tokeninfo(#20022,6,#20001,1,"Foo")
hasLocation(#20022,#20007)
#20023=*
tokeninfo(#20023,8,#20001,2,"{")
#20024=@"loc,{#10000},1,15,1,15"
locations_default(#20024,#10000,1,15,1,15)
hasLocation(#20023,#20024)
#20025=*
tokeninfo(#20025,6,#20001,3,"x")
hasLocation(#20025,#20011)
#20026=*
tokeninfo(#20026,8,#20001,4,":")
#20027=@"loc,{#10000},2,4,2,4"
locations_default(#20027,#10000,2,4,2,4)
hasLocation(#20026,#20027)
#20028=*
tokeninfo(#20028,7,#20001,5,"number")
hasLocation(#20028,#20013)
#20029=*
tokeninfo(#20029,8,#20001,6,";")
#20030=@"loc,{#10000},2,12,2,12"
locations_default(#20030,#10000,2,12,2,12)
hasLocation(#20029,#20030)
#20031=*
tokeninfo(#20031,8,#20001,7,"}")
hasLocation(#20031,#20019)
#20032=*
tokeninfo(#20032,0,#20001,8,"")
#20033=@"loc,{#10000},4,1,4,0"
locations_default(#20033,#10000,4,1,4,0)
hasLocation(#20032,#20033)
#20034=*
entry_cfg_node(#20034,#20001)
#20035=@"loc,{#10000},1,1,1,0"
locations_default(#20035,#10000,1,1,1,0)
hasLocation(#20034,#20035)
#20036=*
exit_cfg_node(#20036,#20001)
hasLocation(#20036,#20033)
successor(#20004,#20036)
successor(#20034,#20004)
numlines(#10000,3,3,0)
filetype(#10000,"typescript")