зеркало из https://github.com/mozilla/gecko-dev.git
servo: Merge #7447 - Issue #7382 Use descriptive enums instead of booleans for MIMEClassifier::classifer (from ddrmanxbxfr:master); r=jdm
Hi guys i've done a small pass of refactor in the MIMEClassifier implementation. (See issue #7382 ) - Moved the predicates to separate functions - Added a mimetype enum so we can compare them easily after calling MIMEClassifier::get_media_type I hope it follows rust good pratices (care it's my first time doing rust). Improvements and tips are welcome :). Thanks for looking at it. Source-Repo: https://github.com/servo/servo Source-Revision: 8a8204ffc8fa287dde2321c40d12b191b51960da
This commit is contained in:
Родитель
1b09c3bfc2
Коммит
3db547ce58
|
@ -6,45 +6,58 @@ use std::borrow::ToOwned;
|
|||
|
||||
pub struct MIMEClassifier {
|
||||
image_classifier: GroupedClassifier,
|
||||
audio_video_classifer: GroupedClassifier,
|
||||
audio_video_classifier: GroupedClassifier,
|
||||
scriptable_classifier: GroupedClassifier,
|
||||
plaintext_classifier: GroupedClassifier,
|
||||
archive_classifer: GroupedClassifier,
|
||||
archive_classifier: GroupedClassifier,
|
||||
binary_or_plaintext: BinaryOrPlaintextClassifier,
|
||||
feeds_classifier: FeedsClassifier
|
||||
}
|
||||
|
||||
pub enum MediaType {
|
||||
Xml,
|
||||
Html,
|
||||
AudioVideo,
|
||||
Image,
|
||||
}
|
||||
|
||||
pub enum ApacheBugFlag {
|
||||
ON,
|
||||
OFF
|
||||
}
|
||||
|
||||
#[derive(PartialEq)]
|
||||
pub enum NoSniffFlag {
|
||||
ON,
|
||||
OFF
|
||||
}
|
||||
|
||||
impl MIMEClassifier {
|
||||
//Performs MIME Type Sniffing Algorithm (section 7)
|
||||
pub fn classify(&self,
|
||||
no_sniff: bool,
|
||||
check_for_apache_bug: bool,
|
||||
no_sniff_flag: NoSniffFlag,
|
||||
apache_bug_flag: ApacheBugFlag,
|
||||
supplied_type: &Option<(String, String)>,
|
||||
data: &[u8]) -> Option<(String, String)> {
|
||||
|
||||
match *supplied_type {
|
||||
None => self.sniff_unknown_type(!no_sniff, data),
|
||||
None => self.sniff_unknown_type(no_sniff_flag, data),
|
||||
Some((ref media_type, ref media_subtype)) => {
|
||||
match (&**media_type, &**media_subtype) {
|
||||
("unknown", "unknown") |
|
||||
("application", "unknown") |
|
||||
("*", "*") => self.sniff_unknown_type(!no_sniff, data),
|
||||
_ => {
|
||||
if no_sniff {
|
||||
supplied_type.clone()
|
||||
} else if check_for_apache_bug {
|
||||
self.sniff_text_or_data(data)
|
||||
} else if MIMEClassifier::is_xml(media_type, media_subtype) {
|
||||
supplied_type.clone()
|
||||
} else if MIMEClassifier::is_html(media_type, media_subtype) {
|
||||
//Implied in section 7.3, but flow is not clear
|
||||
self.feeds_classifier.classify(data).or(supplied_type.clone())
|
||||
} else {
|
||||
match (&**media_type, &**media_subtype) {
|
||||
("image", _) => self.image_classifier.classify(data),
|
||||
("audio", _) | ("video", _) | ("application", "ogg") =>
|
||||
self.audio_video_classifer.classify(data),
|
||||
_ => None
|
||||
if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
|
||||
self.sniff_unknown_type(no_sniff_flag, data)
|
||||
} else {
|
||||
match no_sniff_flag {
|
||||
NoSniffFlag::ON => supplied_type.clone(),
|
||||
NoSniffFlag::OFF => match apache_bug_flag {
|
||||
ApacheBugFlag::ON => self.sniff_text_or_data(data),
|
||||
ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
|
||||
media_subtype) {
|
||||
Some(MediaType::Xml) => supplied_type.clone(),
|
||||
Some(MediaType::Html) =>
|
||||
//Implied in section 7.3, but flow is not clear
|
||||
self.feeds_classifier.classify(data).or(supplied_type.clone()),
|
||||
Some(MediaType::Image) => self.image_classifier.classify(data),
|
||||
Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
|
||||
None => None
|
||||
}.or(supplied_type.clone())
|
||||
}
|
||||
}
|
||||
|
@ -56,26 +69,30 @@ impl MIMEClassifier {
|
|||
pub fn new() -> MIMEClassifier {
|
||||
MIMEClassifier {
|
||||
image_classifier: GroupedClassifier::image_classifer(),
|
||||
audio_video_classifer: GroupedClassifier::audio_video_classifer(),
|
||||
audio_video_classifier: GroupedClassifier::audio_video_classifier(),
|
||||
scriptable_classifier: GroupedClassifier::scriptable_classifier(),
|
||||
plaintext_classifier: GroupedClassifier::plaintext_classifier(),
|
||||
archive_classifer: GroupedClassifier::archive_classifier(),
|
||||
archive_classifier: GroupedClassifier::archive_classifier(),
|
||||
binary_or_plaintext: BinaryOrPlaintextClassifier,
|
||||
feeds_classifier: FeedsClassifier
|
||||
}
|
||||
}
|
||||
|
||||
//some sort of iterator over the classifiers might be better?
|
||||
fn sniff_unknown_type(&self, sniff_scriptable: bool, data: &[u8]) ->
|
||||
fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) ->
|
||||
Option<(String, String)> {
|
||||
if sniff_scriptable {
|
||||
let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::OFF;
|
||||
let sniffed = if should_sniff_scriptable {
|
||||
self.scriptable_classifier.classify(data)
|
||||
} else {
|
||||
None
|
||||
}.or_else(|| self.plaintext_classifier.classify(data))
|
||||
.or_else(|| self.image_classifier.classify(data))
|
||||
.or_else(|| self.audio_video_classifer.classify(data))
|
||||
.or_else(|| self.archive_classifer.classify(data))
|
||||
.or_else(|| self.binary_or_plaintext.classify(data))
|
||||
};
|
||||
|
||||
sniffed.or_else(|| self.plaintext_classifier.classify(data))
|
||||
.or_else(|| self.image_classifier.classify(data))
|
||||
.or_else(|| self.audio_video_classifier.classify(data))
|
||||
.or_else(|| self.archive_classifier.classify(data))
|
||||
.or_else(|| self.binary_or_plaintext.classify(data))
|
||||
}
|
||||
|
||||
fn sniff_text_or_data(&self, data: &[u8]) -> Option<(String, String)> {
|
||||
|
@ -93,6 +110,40 @@ impl MIMEClassifier {
|
|||
fn is_html(tp: &str, sub_tp: &str) -> bool {
|
||||
tp == "text" && sub_tp == "html"
|
||||
}
|
||||
|
||||
fn is_image(tp: &str) -> bool {
|
||||
tp == "image"
|
||||
}
|
||||
|
||||
fn is_audio_video(tp: &str, sub_tp: &str) -> bool {
|
||||
tp == "audio" ||
|
||||
tp == "video" ||
|
||||
(tp == "application" && sub_tp == "ogg")
|
||||
}
|
||||
|
||||
fn is_explicit_unknown(tp: &str, sub_tp: &str) -> bool {
|
||||
match(tp, sub_tp) {
|
||||
("unknown", "unknown") |
|
||||
("application", "unknown") |
|
||||
("*", "*") => true,
|
||||
_ => false
|
||||
}
|
||||
}
|
||||
|
||||
fn get_media_type(media_type: &String,
|
||||
media_subtype: &String) -> Option<MediaType> {
|
||||
if MIMEClassifier::is_xml(media_type, media_subtype) {
|
||||
Some(MediaType::Xml)
|
||||
} else if MIMEClassifier::is_html(media_type, media_subtype) {
|
||||
Some(MediaType::Html)
|
||||
} else if MIMEClassifier::is_image(media_type) {
|
||||
Some(MediaType::Image)
|
||||
} else if MIMEClassifier::is_audio_video(media_type, media_subtype) {
|
||||
Some(MediaType::AudioVideo)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<(String, String)> {
|
||||
|
@ -227,8 +278,8 @@ struct BinaryOrPlaintextClassifier;
|
|||
|
||||
impl BinaryOrPlaintextClassifier {
|
||||
fn classify_impl(&self, data: &[u8]) -> (&'static str, &'static str) {
|
||||
if data == &[0xFFu8, 0xFEu8] ||
|
||||
data == &[0xFEu8, 0xFFu8] ||
|
||||
if data.starts_with(&[0xFFu8, 0xFEu8]) ||
|
||||
data.starts_with(&[0xFEu8, 0xFFu8]) ||
|
||||
data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
|
||||
{
|
||||
("text", "plain")
|
||||
|
@ -265,7 +316,7 @@ impl GroupedClassifier {
|
|||
]
|
||||
}
|
||||
}
|
||||
fn audio_video_classifer() -> GroupedClassifier {
|
||||
fn audio_video_classifier() -> GroupedClassifier {
|
||||
GroupedClassifier {
|
||||
byte_matchers: vec![
|
||||
box ByteMatcher::video_webm(),
|
||||
|
|
|
@ -10,8 +10,7 @@ use cookie_storage::CookieStorage;
|
|||
use data_loader;
|
||||
use file_loader;
|
||||
use http_loader::{self, create_http_connector, Connector};
|
||||
use mime_classifier::MIMEClassifier;
|
||||
|
||||
use mime_classifier::{ApacheBugFlag, MIMEClassifier, NoSniffFlag};
|
||||
use net_traits::ProgressMsg::Done;
|
||||
use net_traits::{ControlMsg, LoadData, LoadResponse, LoadConsumer, CookieSource};
|
||||
use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction};
|
||||
|
@ -29,7 +28,9 @@ use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
|
|||
|
||||
use std::borrow::ToOwned;
|
||||
use std::boxed::FnBox;
|
||||
use std::sync::Arc;
|
||||
|
||||
use std::sync::{Arc};
|
||||
|
||||
use std::sync::mpsc::{channel, Sender};
|
||||
|
||||
pub enum ProgressSender {
|
||||
|
@ -72,21 +73,20 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
|
|||
-> Result<ProgressSender, ()> {
|
||||
if opts::get().sniff_mime_types {
|
||||
// TODO: should be calculated in the resource loader, from pull requeset #4094
|
||||
let mut nosniff = false;
|
||||
let mut check_for_apache_bug = false;
|
||||
let mut no_sniff = NoSniffFlag::OFF;
|
||||
let mut check_for_apache_bug = ApacheBugFlag::OFF;
|
||||
|
||||
if let Some(ref headers) = metadata.headers {
|
||||
if let Some(ref raw_content_type) = headers.get_raw("content-type") {
|
||||
if raw_content_type.len() > 0 {
|
||||
let ref last_raw_content_type = raw_content_type[raw_content_type.len() - 1];
|
||||
check_for_apache_bug = last_raw_content_type == b"text/plain"
|
||||
|| last_raw_content_type == b"text/plain; charset=ISO-8859-1"
|
||||
|| last_raw_content_type == b"text/plain; charset=iso-8859-1"
|
||||
|| last_raw_content_type == b"text/plain; charset=UTF-8";
|
||||
check_for_apache_bug = apache_bug_predicate(last_raw_content_type)
|
||||
}
|
||||
}
|
||||
if let Some(ref raw_content_type_options) = headers.get_raw("X-content-type-options") {
|
||||
nosniff = raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff");
|
||||
if raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff") {
|
||||
no_sniff = NoSniffFlag::ON
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,7 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
|
|||
metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| {
|
||||
(format!("{}", toplevel), format!("{}", sublevel))
|
||||
});
|
||||
metadata.content_type = classifier.classify(nosniff, check_for_apache_bug, &supplied_type,
|
||||
metadata.content_type = classifier.classify(no_sniff, check_for_apache_bug, &supplied_type,
|
||||
&partial_body).map(|(toplevel, sublevel)| {
|
||||
let mime_tp: TopLevel = toplevel.parse().unwrap();
|
||||
let mime_sb: SubLevel = sublevel.parse().unwrap();
|
||||
|
@ -106,6 +106,17 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
|
|||
start_sending_opt(start_chan, metadata)
|
||||
}
|
||||
|
||||
fn apache_bug_predicate(last_raw_content_type: &[u8]) -> ApacheBugFlag {
|
||||
if last_raw_content_type == b"text/plain"
|
||||
|| last_raw_content_type == b"text/plain; charset=ISO-8859-1"
|
||||
|| last_raw_content_type == b"text/plain; charset=iso-8859-1"
|
||||
|| last_raw_content_type == b"text/plain; charset=UTF-8" {
|
||||
ApacheBugFlag::ON
|
||||
} else {
|
||||
ApacheBugFlag::OFF
|
||||
}
|
||||
}
|
||||
|
||||
/// For use by loaders in responding to a Load message.
|
||||
pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result<ProgressSender, ()> {
|
||||
match start_chan {
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use net::mime_classifier::as_string_option;
|
||||
use net::mime_classifier::{Mp4Matcher, MIMEClassifier};
|
||||
use net::mime_classifier::{Mp4Matcher, MIMEClassifier, ApacheBugFlag, NoSniffFlag};
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
|
@ -37,8 +37,12 @@ fn test_sniff_mp4_matcher() {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
|
||||
supplied_type: Option<(&'static str, &'static str)>) {
|
||||
fn test_sniff_with_flags(filename_orig: &path::Path,
|
||||
type_string: &str,
|
||||
subtype_string: &str,
|
||||
supplied_type: Option<(&'static str, &'static str)>,
|
||||
no_sniff_flag: NoSniffFlag,
|
||||
apache_bug_flag: ApacheBugFlag) {
|
||||
let current_working_directory = env::current_dir().unwrap();
|
||||
println!("The current directory is {}", current_working_directory.display());
|
||||
|
||||
|
@ -51,7 +55,7 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
|
|||
|
||||
match read_result {
|
||||
Ok(data) => {
|
||||
match classifier.classify(false, false, &as_string_option(supplied_type), &data) {
|
||||
match classifier.classify(no_sniff_flag, apache_bug_flag, &as_string_option(supplied_type), &data) {
|
||||
Some((parsed_type, parsed_subtp)) => {
|
||||
if (&parsed_type[..] != type_string) ||
|
||||
(&parsed_subtp[..] != subtype_string) {
|
||||
|
@ -69,6 +73,17 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
|
||||
supplied_type: Option<(&'static str, &'static str)>) {
|
||||
test_sniff_with_flags(filename_orig,
|
||||
type_string,
|
||||
subtype_string,
|
||||
supplied_type,
|
||||
NoSniffFlag::OFF,
|
||||
ApacheBugFlag::OFF)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn test_sniff_classification(file: &str, type_string: &str, subtype_string: &str,
|
||||
supplied_type: Option<(&'static str, &'static str)>) {
|
||||
|
@ -448,3 +463,79 @@ fn test_sniff_rss_feed() {
|
|||
fn test_sniff_atom_feed() {
|
||||
test_sniff_full(&PathBuf::from("text/xml/feed.atom"), "application", "atom+xml", Some(("text", "html")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_binary_file() {
|
||||
test_sniff_full(&PathBuf::from("unknown/binary_file"), "application", "octet-stream", None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_atom_feed_with_no_sniff_flag_on() {
|
||||
test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
|
||||
"text",
|
||||
"html",
|
||||
Some(("text", "html")),
|
||||
NoSniffFlag::ON,
|
||||
ApacheBugFlag::OFF);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_with_no_sniff_flag_on_and_apache_flag_on() {
|
||||
test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
|
||||
"text",
|
||||
"html",
|
||||
Some(("text", "html")),
|
||||
NoSniffFlag::ON,
|
||||
ApacheBugFlag::ON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_utf_8_bom_with_apache_flag_on() {
|
||||
test_sniff_with_flags(&PathBuf::from("text/plain/utf8bom.txt"),
|
||||
"text",
|
||||
"plain",
|
||||
Some(("dummy", "text")),
|
||||
NoSniffFlag::OFF,
|
||||
ApacheBugFlag::ON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_utf_16be_bom_with_apache_flag_on() {
|
||||
test_sniff_with_flags(&PathBuf::from("text/plain/utf16bebom.txt"),
|
||||
"text",
|
||||
"plain",
|
||||
Some(("dummy", "text")),
|
||||
NoSniffFlag::OFF,
|
||||
ApacheBugFlag::ON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_utf_16le_bom_with_apache_flag_on() {
|
||||
test_sniff_with_flags(&PathBuf::from("text/plain/utf16lebom.txt"),
|
||||
"text",
|
||||
"plain",
|
||||
Some(("dummy", "text")),
|
||||
NoSniffFlag::OFF,
|
||||
ApacheBugFlag::ON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_octet_stream_apache_flag_on() {
|
||||
test_sniff_with_flags(&PathBuf::from("unknown/binary_file"),
|
||||
"application",
|
||||
"octet-stream",
|
||||
Some(("dummy", "binary")),
|
||||
NoSniffFlag::OFF,
|
||||
ApacheBugFlag::ON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sniff_mp4_video_apache_flag_on() {
|
||||
test_sniff_with_flags(&PathBuf::from("video/mp4/test.mp4"),
|
||||
"application",
|
||||
"octet-stream",
|
||||
Some(("video", "mp4")),
|
||||
NoSniffFlag::OFF,
|
||||
ApacheBugFlag::ON);
|
||||
}
|
||||
|
||||
|
|
Двоичный файл не отображается.
Загрузка…
Ссылка в новой задаче