зеркало из https://github.com/mozilla/gecko-dev.git
servo: Merge #7449 - Issue #7393: Properly sniff mislabeled feeds (from simartin:issue_7393); r=metajack
Hi, This patch is an attempt to fix https://github.com/servo/servo/issues/7393, where the code detecting mislabeled feeds (see https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed) had spurious space in the URLs we need to match. Note that my testing (in particular rdf_rss_ko_2.xml) highlighted a flaw in "matches", that failed to check that there were more bytes in the string being checked than in the string we're checking against, which completely broke the whole step 5.2.7. Thanks in advance for your review. Cheers, Simon Source-Repo: https://github.com/servo/servo Source-Revision: 9f85370885c84ebb58cd7f4a72a6e78948f468dc
This commit is contained in:
Родитель
9d89ba65ed
Коммит
b1a37d6095
|
@ -125,6 +125,10 @@ impl <'a, T: Iterator<Item=&'a u8> + Clone> Matches for T {
|
|||
// Side effects
|
||||
// moves the iterator when match is found
|
||||
fn matches(&mut self, matches: &[u8]) -> bool {
|
||||
if self.clone().nth(matches.len()).is_none() {
|
||||
// there are less than matches.len() elements in self
|
||||
return false
|
||||
}
|
||||
let result = self.clone().zip(matches).all(|(s, m)| *s == *m);
|
||||
if result {
|
||||
self.nth(matches.len());
|
||||
|
@ -381,9 +385,10 @@ where T: Iterator<Item=&'a u8> + Clone {
|
|||
|
||||
struct FeedsClassifier;
|
||||
impl FeedsClassifier {
|
||||
// Implements sniffing for mislabeled feeds (https://mimesniff.spec.whatwg.org/#sniffing-a-mislabeled-feed)
|
||||
fn classify_impl(&self, data: &[u8]) -> Option<(&'static str, &'static str)> {
|
||||
|
||||
// can not be feed unless length is > 3
|
||||
// Step 4: can not be feed unless length is > 3
|
||||
if data.len() < 3 {
|
||||
return None;
|
||||
}
|
||||
|
@ -403,6 +408,7 @@ impl FeedsClassifier {
|
|||
return None;
|
||||
}
|
||||
|
||||
// Steps 5.2.1 to 5.2.4
|
||||
match eats_until(&mut matcher, b"?", b"?>")
|
||||
.chain(|| eats_until(&mut matcher, b"!--", b"-->"))
|
||||
.chain(|| eats_until(&mut matcher, b"!", b">")) {
|
||||
|
@ -411,20 +417,23 @@ impl FeedsClassifier {
|
|||
Match::Start => return None
|
||||
}
|
||||
|
||||
// Step 5.2.5
|
||||
if matcher.matches(b"rss") {
|
||||
return Some(("application", "rss+xml"));
|
||||
}
|
||||
// Step 5.2.6
|
||||
if matcher.matches(b"feed") {
|
||||
return Some(("application", "atom+xml"));
|
||||
}
|
||||
if matcher.matches(b"rdf: RDF") {
|
||||
// Step 5.2.7
|
||||
if matcher.matches(b"rdf:RDF") {
|
||||
while matcher.next().is_some() {
|
||||
match eats_until(&mut matcher,
|
||||
b"http: //purl.org/rss/1.0/",
|
||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
b"http://purl.org/rss/1.0/",
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
|
||||
.chain(|| eats_until(&mut matcher,
|
||||
b"http: //www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http: //purl.org/rss/1.0/")) {
|
||||
b"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
||||
b"http://purl.org/rss/1.0/")) {
|
||||
Match::StartAndEnd => return Some(("application", "rss+xml")),
|
||||
Match::DidNotMatch => {},
|
||||
Match::Start => return None
|
||||
|
|
|
@ -434,7 +434,14 @@ fn test_sniff_utf_8_bom() {
|
|||
|
||||
#[test]
|
||||
fn test_sniff_rss_feed() {
|
||||
// RSS feeds
|
||||
test_sniff_full(&PathBuf::from("text/xml/feed.rss"), "application", "rss+xml", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss.xml"), "application", "rss+xml", Some(("text", "html")));
|
||||
// Not RSS feeds
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_1.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_2.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_3.xml"), "text", "html", Some(("text", "html")));
|
||||
test_sniff_full(&PathBuf::from("text/xml/rdf_rss_ko_4.xml"), "text", "html", Some(("text", "html")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
<!-- Good format for a "RDF feed" -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
|
@ -0,0 +1,7 @@
|
|||
<!-- Bad format for a "RDF feed" (space between "rdf:" and "RDF") -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf: RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
|
@ -0,0 +1,3 @@
|
|||
<!-- Bad format for a "RDF feed" (2 missing URLs) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF/>
|
|
@ -0,0 +1,6 @@
|
|||
<!-- Bad format for a "RDF feed" (one missing URL) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
>
|
||||
</rdf:RDF>
|
|
@ -0,0 +1,7 @@
|
|||
<!-- Bad format for a "RDF feed" (unexpected space in first URL) -->
|
||||
<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http: //www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
</rdf:RDF>
|
Загрузка…
Ссылка в новой задаче