diff --git a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java index e850e9e3b34..664d898d5e1 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java +++ b/javascript/extractor/src/com/semmle/js/extractor/HTMLExtractor.java @@ -89,18 +89,23 @@ public class HTMLExtractor implements IExtractor { } } else { Attributes attributes = elt.getAttributes(); + boolean attributesAreExtracted = shouldExtractAttributes(elt); // attributes can be null for directives if (attributes != null) for (Attribute attr : attributes) { // ignore empty attributes if (attr.getValue() == null || attr.getValue().isEmpty()) continue; + // If attributes are not extracted we can't use the attribute as the parent node. + // In this case, use the enclosing element as the node. + Segment parentSegment = attributesAreExtracted ? attr : elt; + extractTemplateTags( textualExtractor, attr.getSource(), attr.getBegin(), attr.getEnd(), - () -> context.getNodeLabel(attr)); + () -> context.getNodeLabel(parentSegment)); String source = attr.getValue(); int valueStart = attr.getValueSegment().getBegin(); @@ -113,7 +118,7 @@ public class HTMLExtractor implements IExtractor { source, valueStart, false /* isTypeScript */, - context.getNodeLabel(attr)); + context.getNodeLabel(parentSegment)); } else if (isAngularTemplateAttributeName(attr.getName())) { // For an attribute *ngFor="let var of EXPR", start parsing at EXPR int offset = 0; @@ -133,7 +138,7 @@ public class HTMLExtractor implements IExtractor { source, valueStart + offset, false /* isTypeScript */, - context.getNodeLabel(attr)); + context.getNodeLabel(parentSegment)); } else if (source.startsWith("javascript:")) { source = source.substring(11); extractSnippet( @@ -144,7 +149,7 @@ public class HTMLExtractor implements IExtractor { source, valueStart + 11, false /* isTypeScript */, - context.getNodeLabel(attr)); + context.getNodeLabel(parentSegment)); } } } diff --git a/javascript/extractor/src/com/semmle/js/extractor/Main.java b/javascript/extractor/src/com/semmle/js/extractor/Main.java index 201f918f3e4..7132ba9cc2b 100644 --- a/javascript/extractor/src/com/semmle/js/extractor/Main.java +++ b/javascript/extractor/src/com/semmle/js/extractor/Main.java @@ -43,7 +43,7 @@ public class Main { * A version identifier that should be updated every time the extractor changes in such a way that * it may produce different tuples for the same file under the same {@link ExtractorConfig}. */ - public static final String EXTRACTOR_VERSION = "2021-09-01"; + public static final String EXTRACTOR_VERSION = "2021-10-25"; public static final Pattern NEWLINE = Pattern.compile("\n"); diff --git a/javascript/extractor/tests/generatedcode/input/attributes.html b/javascript/extractor/tests/generatedcode/input/attributes.html new file mode 100644 index 00000000000..8735b988cb4 --- /dev/null +++ b/javascript/extractor/tests/generatedcode/input/attributes.html @@ -0,0 +1,10 @@ + + + +
+
+
+
+
+ + diff --git a/javascript/extractor/tests/generatedcode/output/trap/attributes.html.trap b/javascript/extractor/tests/generatedcode/output/trap/attributes.html.trap new file mode 100644 index 00000000000..cb5eb79b1ca --- /dev/null +++ b/javascript/extractor/tests/generatedcode/output/trap/attributes.html.trap @@ -0,0 +1,392 @@ +#10000=@"/attributes.html;sourcefile" +files(#10000,"/attributes.html") +#10001=@"/;folder" +folders(#10001,"/") +containerparent(#10001,#10000) +#10002=@"loc,{#10000},0,0,0,0" +locations_default(#10002,#10000,0,0,0,0) +hasLocation(#10000,#10002) +#20000=@"global_scope" +scopes(#20000,0) +#20001=* +#20002=* +template_placeholder_tag_info(#20001,#20002,"{{foo}}") +#20003=@"loc,{#10000},4,14,4,20" +locations_default(#20003,#10000,4,14,4,20) +hasLocation(#20001,#20003) +scopes(#20000,0) +#20004=@"script;{#10000},4,16" +#20005=* +lines(#20005,#20004,"foo","") +#20006=@"loc,{#10000},4,16,4,18" +locations_default(#20006,#10000,4,16,4,18) +hasLocation(#20005,#20006) +numlines(#20004,1,1,0) +#20007=* +tokeninfo(#20007,6,#20004,0,"foo") +hasLocation(#20007,#20006) +#20008=* +tokeninfo(#20008,0,#20004,1,"") +#20009=@"loc,{#10000},4,19,4,18" +locations_default(#20009,#10000,4,19,4,18) +hasLocation(#20008,#20009) +toplevels(#20004,4) +hasLocation(#20004,#20006) +#20010=@"module;{#10000},4,16" +scopes(#20010,3) +scopenodes(#20004,#20010) +scopenesting(#20010,#20000) +is_module(#20004) +#20011=* +stmts(#20011,2,#20004,0,"foo") +hasLocation(#20011,#20006) +stmt_containers(#20011,#20004) +#20012=* +exprs(#20012,79,#20011,0,"foo") +hasLocation(#20012,#20006) +enclosing_stmt(#20012,#20011) +expr_containers(#20012,#20004) +literals("foo","foo",#20012) +#20013=@"var;{foo};{#20010}" +variables(#20013,"foo",#20010) +bind(#20012,#20013) +#20014=* +entry_cfg_node(#20014,#20004) +#20015=@"loc,{#10000},4,16,4,15" +locations_default(#20015,#10000,4,16,4,15) +hasLocation(#20014,#20015) +#20016=* +exit_cfg_node(#20016,#20004) +hasLocation(#20016,#20009) +successor(#20011,#20012) +successor(#20012,#20016) +successor(#20014,#20011) +toplevel_parent_xml_node(#20004,#20001) +#20017=* +#20018=* +template_placeholder_tag_info(#20017,#20018,"{{{foo}}}") +#20019=@"loc,{#10000},5,14,5,22" +locations_default(#20019,#10000,5,14,5,22) +hasLocation(#20017,#20019) +scopes(#20000,0) +#20020=@"script;{#10000},5,17" +#20021=* +lines(#20021,#20020,"foo","") +#20022=@"loc,{#10000},5,17,5,19" +locations_default(#20022,#10000,5,17,5,19) +hasLocation(#20021,#20022) +numlines(#20020,1,1,0) +#20023=* +tokeninfo(#20023,6,#20020,0,"foo") +hasLocation(#20023,#20022) +#20024=* +tokeninfo(#20024,0,#20020,1,"") +#20025=@"loc,{#10000},5,20,5,19" +locations_default(#20025,#10000,5,20,5,19) +hasLocation(#20024,#20025) +toplevels(#20020,4) +hasLocation(#20020,#20022) +#20026=@"module;{#10000},5,17" +scopes(#20026,3) +scopenodes(#20020,#20026) +scopenesting(#20026,#20000) +is_module(#20020) +#20027=* +stmts(#20027,2,#20020,0,"foo") +hasLocation(#20027,#20022) +stmt_containers(#20027,#20020) +#20028=* +exprs(#20028,79,#20027,0,"foo") +hasLocation(#20028,#20022) +enclosing_stmt(#20028,#20027) +expr_containers(#20028,#20020) +literals("foo","foo",#20028) +#20029=@"var;{foo};{#20026}" +variables(#20029,"foo",#20026) +bind(#20028,#20029) +#20030=* +entry_cfg_node(#20030,#20020) +#20031=@"loc,{#10000},5,17,5,16" +locations_default(#20031,#10000,5,17,5,16) +hasLocation(#20030,#20031) +#20032=* +exit_cfg_node(#20032,#20020) +hasLocation(#20032,#20025) +successor(#20027,#20028) +successor(#20028,#20032) +successor(#20030,#20027) +toplevel_parent_xml_node(#20020,#20017) +#20033=* +template_placeholder_tag_info(#20033,#20018,"{{/foo}}") +#20034=@"loc,{#10000},5,23,5,30" +locations_default(#20034,#10000,5,23,5,30) +hasLocation(#20033,#20034) +scopes(#20000,0) +#20035=@"script;{#10000},5,25" +toplevels(#20035,4) +#20036=@"loc,{#10000},5,25,5,25" +locations_default(#20036,#10000,5,25,5,25) +hasLocation(#20035,#20036) +#20037=* +js_parse_errors(#20037,#20035,"Error: Unterminated regular expression","/foo") +#20038=@"loc,{#10000},5,26,5,26" +locations_default(#20038,#10000,5,26,5,26) +hasLocation(#20037,#20038) +#20039=* +lines(#20039,#20035,"/foo","") +#20040=@"loc,{#10000},5,25,5,28" +locations_default(#20040,#10000,5,25,5,28) +hasLocation(#20039,#20040) +numlines(#20035,1,0,0) +toplevel_parent_xml_node(#20035,#20033) +#20041=* +#20042=* +template_placeholder_tag_info(#20041,#20042,"{{#foo}}") +#20043=@"loc,{#10000},6,14,6,21" +locations_default(#20043,#10000,6,14,6,21) +hasLocation(#20041,#20043) +scopes(#20000,0) +#20044=@"script;{#10000},6,16" +toplevels(#20044,4) +#20045=@"loc,{#10000},6,16,6,16" +locations_default(#20045,#10000,6,16,6,16) +hasLocation(#20044,#20045) +#20046=* +js_parse_errors(#20046,#20044,"Error: Unexpected token","#foo") +hasLocation(#20046,#20045) +#20047=* +lines(#20047,#20044,"#foo","") +#20048=@"loc,{#10000},6,16,6,19" +locations_default(#20048,#10000,6,16,6,19) +hasLocation(#20047,#20048) +numlines(#20044,1,0,0) +toplevel_parent_xml_node(#20044,#20041) +#20049=* +template_placeholder_tag_info(#20049,#20042,"{{/foo}}") +#20050=@"loc,{#10000},6,22,6,29" +locations_default(#20050,#10000,6,22,6,29) +hasLocation(#20049,#20050) +scopes(#20000,0) +#20051=@"script;{#10000},6,24" +toplevels(#20051,4) +#20052=@"loc,{#10000},6,24,6,24" +locations_default(#20052,#10000,6,24,6,24) +hasLocation(#20051,#20052) +#20053=* +js_parse_errors(#20053,#20051,"Error: Unterminated regular expression","/foo") +#20054=@"loc,{#10000},6,25,6,25" +locations_default(#20054,#10000,6,25,6,25) +hasLocation(#20053,#20054) +#20055=* +lines(#20055,#20051,"/foo","") +#20056=@"loc,{#10000},6,24,6,27" +locations_default(#20056,#10000,6,24,6,27) +hasLocation(#20055,#20056) +numlines(#20051,1,0,0) +toplevel_parent_xml_node(#20051,#20049) +#20057=* +#20058=* +template_placeholder_tag_info(#20057,#20058,"{{#foo}}") +#20059=@"loc,{#10000},8,18,8,25" +locations_default(#20059,#10000,8,18,8,25) +hasLocation(#20057,#20059) +scopes(#20000,0) +#20060=@"script;{#10000},8,20" +toplevels(#20060,4) +#20061=@"loc,{#10000},8,20,8,20" +locations_default(#20061,#10000,8,20,8,20) +hasLocation(#20060,#20061) +#20062=* +js_parse_errors(#20062,#20060,"Error: Unexpected token","#foo") +hasLocation(#20062,#20061) +#20063=* +lines(#20063,#20060,"#foo","") +#20064=@"loc,{#10000},8,20,8,23" +locations_default(#20064,#10000,8,20,8,23) +hasLocation(#20063,#20064) +numlines(#20060,1,0,0) +toplevel_parent_xml_node(#20060,#20057) +#20065=* +template_placeholder_tag_info(#20065,#20058,"{{baz}}") +#20066=@"loc,{#10000},8,30,8,36" +locations_default(#20066,#10000,8,30,8,36) +hasLocation(#20065,#20066) +scopes(#20000,0) +#20067=@"script;{#10000},8,32" +#20068=* +lines(#20068,#20067,"baz","") +#20069=@"loc,{#10000},8,32,8,34" +locations_default(#20069,#10000,8,32,8,34) +hasLocation(#20068,#20069) +numlines(#20067,1,1,0) +#20070=* +tokeninfo(#20070,6,#20067,0,"baz") +hasLocation(#20070,#20069) +#20071=* +tokeninfo(#20071,0,#20067,1,"") +#20072=@"loc,{#10000},8,35,8,34" +locations_default(#20072,#10000,8,35,8,34) +hasLocation(#20071,#20072) +toplevels(#20067,4) +hasLocation(#20067,#20069) +#20073=@"module;{#10000},8,32" +scopes(#20073,3) +scopenodes(#20067,#20073) +scopenesting(#20073,#20000) +is_module(#20067) +#20074=* +stmts(#20074,2,#20067,0,"baz") +hasLocation(#20074,#20069) +stmt_containers(#20074,#20067) +#20075=* +exprs(#20075,79,#20074,0,"baz") +hasLocation(#20075,#20069) +enclosing_stmt(#20075,#20074) +expr_containers(#20075,#20067) +literals("baz","baz",#20075) +#20076=@"var;{baz};{#20073}" +variables(#20076,"baz",#20073) +bind(#20075,#20076) +#20077=* +entry_cfg_node(#20077,#20067) +#20078=@"loc,{#10000},8,32,8,31" +locations_default(#20078,#10000,8,32,8,31) +hasLocation(#20077,#20078) +#20079=* +exit_cfg_node(#20079,#20067) +hasLocation(#20079,#20072) +successor(#20074,#20075) +successor(#20075,#20079) +successor(#20077,#20074) +toplevel_parent_xml_node(#20067,#20065) +#20080=* +template_placeholder_tag_info(#20080,#20058,"{{/foo}}") +#20081=@"loc,{#10000},8,37,8,44" +locations_default(#20081,#10000,8,37,8,44) +hasLocation(#20080,#20081) +scopes(#20000,0) +#20082=@"script;{#10000},8,39" +toplevels(#20082,4) +#20083=@"loc,{#10000},8,39,8,39" +locations_default(#20083,#10000,8,39,8,39) +hasLocation(#20082,#20083) +#20084=* +js_parse_errors(#20084,#20082,"Error: Unterminated regular expression","/foo") +#20085=@"loc,{#10000},8,40,8,40" +locations_default(#20085,#10000,8,40,8,40) +hasLocation(#20084,#20085) +#20086=* +lines(#20086,#20082,"/foo","") +#20087=@"loc,{#10000},8,39,8,42" +locations_default(#20087,#10000,8,39,8,42) +hasLocation(#20086,#20087) +numlines(#20082,1,0,0) +toplevel_parent_xml_node(#20082,#20080) +#20088=* +xmlChars(#20088," +",#10000,0,0,#10000) +#20089=@"loc,{#10000},1,16,1,16" +locations_default(#20089,#10000,1,16,1,16) +xmllocations(#20088,#20089) +#20090=* +xmlChars(#20090," +",#10000,2,0,#10000) +#20091=@"loc,{#10000},10,8,10,8" +locations_default(#20091,#10000,10,8,10,8) +xmllocations(#20090,#20091) +#20092=* +xmlElements(#20092,"html",#10000,1,#10000) +#20093=@"loc,{#10000},2,1,10,7" +locations_default(#20093,#10000,2,1,10,7) +xmllocations(#20092,#20093) +#20094=* +xmlChars(#20094," + ",#20092,0,0,#10000) +#20095=@"loc,{#10000},2,7,3,2" +locations_default(#20095,#10000,2,7,3,2) +xmllocations(#20094,#20095) +#20096=* +xmlChars(#20096," +",#20092,2,0,#10000) +#20097=@"loc,{#10000},9,10,9,10" +locations_default(#20097,#10000,9,10,9,10) +xmllocations(#20096,#20097) +#20098=* +xmlElements(#20098,"body",#20092,1,#10000) +#20099=@"loc,{#10000},3,3,9,9" +locations_default(#20099,#10000,3,3,9,9) +xmllocations(#20098,#20099) +#20100=* +xmlChars(#20100," + ",#20098,0,0,#10000) +#20101=@"loc,{#10000},3,9,4,4" +locations_default(#20101,#10000,3,9,4,4) +xmllocations(#20100,#20101) +#20102=* +xmlChars(#20102," + ",#20098,2,0,#10000) +#20103=@"loc,{#10000},4,28,5,4" +locations_default(#20103,#10000,4,28,5,4) +xmllocations(#20102,#20103) +#20104=* +xmlChars(#20104," + ",#20098,4,0,#10000) +#20105=@"loc,{#10000},5,33,6,4" +locations_default(#20105,#10000,5,33,6,4) +xmllocations(#20104,#20105) +#20106=* +xmlChars(#20106," + ",#20098,6,0,#10000) +#20107=@"loc,{#10000},6,32,7,4" +locations_default(#20107,#10000,6,32,7,4) +xmllocations(#20106,#20107) +#20108=* +xmlChars(#20108," + ",#20098,8,0,#10000) +#20109=@"loc,{#10000},7,36,8,4" +locations_default(#20109,#10000,7,36,8,4) +xmllocations(#20108,#20109) +#20110=* +xmlChars(#20110," + ",#20098,10,0,#10000) +#20111=@"loc,{#10000},8,47,9,2" +locations_default(#20111,#10000,8,47,9,2) +xmllocations(#20110,#20111) +xmlElements(#20058,"div",#20098,9,#10000) +#20112=@"loc,{#10000},8,5,8,46" +locations_default(#20112,#10000,8,5,8,46) +xmllocations(#20058,#20112) +#20113=* +xmlElements(#20113,"div",#20098,7,#10000) +#20114=@"loc,{#10000},7,5,7,35" +locations_default(#20114,#10000,7,5,7,35) +xmllocations(#20113,#20114) +#20115=* +xmlElements(#20115,"div",#20098,5,#10000) +#20116=@"loc,{#10000},6,5,6,31" +locations_default(#20116,#10000,6,5,6,31) +xmllocations(#20115,#20116) +xmlAttrs(#20042,#20115,"foo","{{#foo}}{{/foo}}/",0,#10000) +#20117=@"loc,{#10000},6,10,6,30" +locations_default(#20117,#10000,6,10,6,30) +xmllocations(#20042,#20117) +#20118=* +xmlElements(#20118,"div",#20098,3,#10000) +#20119=@"loc,{#10000},5,5,5,32" +locations_default(#20119,#10000,5,5,5,32) +xmllocations(#20118,#20119) +xmlAttrs(#20018,#20118,"foo","{{{foo}}}{{/foo}}/",0,#10000) +#20120=@"loc,{#10000},5,10,5,31" +locations_default(#20120,#10000,5,10,5,31) +xmllocations(#20018,#20120) +#20121=* +xmlElements(#20121,"div",#20098,1,#10000) +#20122=@"loc,{#10000},4,5,4,27" +locations_default(#20122,#10000,4,5,4,27) +xmllocations(#20121,#20122) +xmlAttrs(#20002,#20121,"foo","{{foo}}",0,#10000) +#20123=@"loc,{#10000},4,10,4,20" +locations_default(#20123,#10000,4,10,4,20) +xmllocations(#20002,#20123) +numlines(#10000,10,3,0) +filetype(#10000,"html")