servo: Merge #7447 - Issue #7382 Use descriptive enums instead of booleans for MIMEClassifier::classifer (from ddrmanxbxfr:master); r=jdm

Hi guys i've done a small pass of refactor in the MIMEClassifier implementation. (See issue #7382 )
   - Moved the predicates to separate functions
   - Added a mimetype enum so we can compare them easily after calling MIMEClassifier::get_media_type

I hope it follows rust good pratices (care it's my first time doing rust).

Improvements and tips are welcome :).

Thanks for looking at it.

Source-Repo: https://github.com/servo/servo
Source-Revision: 8a8204ffc8fa287dde2321c40d12b191b51960da
This commit is contained in:
Mathieu Rheaume 2015-09-18 07:22:59 -06:00
Родитель 1b09c3bfc2
Коммит 3db547ce58
4 изменённых файлов: 206 добавлений и 53 удалений

Просмотреть файл

@ -6,45 +6,58 @@ use std::borrow::ToOwned;
pub struct MIMEClassifier {
image_classifier: GroupedClassifier,
audio_video_classifer: GroupedClassifier,
audio_video_classifier: GroupedClassifier,
scriptable_classifier: GroupedClassifier,
plaintext_classifier: GroupedClassifier,
archive_classifer: GroupedClassifier,
archive_classifier: GroupedClassifier,
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
}
pub enum MediaType {
Xml,
Html,
AudioVideo,
Image,
}
pub enum ApacheBugFlag {
ON,
OFF
}
#[derive(PartialEq)]
pub enum NoSniffFlag {
ON,
OFF
}
impl MIMEClassifier {
//Performs MIME Type Sniffing Algorithm (section 7)
pub fn classify(&self,
no_sniff: bool,
check_for_apache_bug: bool,
no_sniff_flag: NoSniffFlag,
apache_bug_flag: ApacheBugFlag,
supplied_type: &Option<(String, String)>,
data: &[u8]) -> Option<(String, String)> {
match *supplied_type {
None => self.sniff_unknown_type(!no_sniff, data),
None => self.sniff_unknown_type(no_sniff_flag, data),
Some((ref media_type, ref media_subtype)) => {
match (&**media_type, &**media_subtype) {
("unknown", "unknown") |
("application", "unknown") |
("*", "*") => self.sniff_unknown_type(!no_sniff, data),
_ => {
if no_sniff {
supplied_type.clone()
} else if check_for_apache_bug {
self.sniff_text_or_data(data)
} else if MIMEClassifier::is_xml(media_type, media_subtype) {
supplied_type.clone()
} else if MIMEClassifier::is_html(media_type, media_subtype) {
//Implied in section 7.3, but flow is not clear
self.feeds_classifier.classify(data).or(supplied_type.clone())
} else {
match (&**media_type, &**media_subtype) {
("image", _) => self.image_classifier.classify(data),
("audio", _) | ("video", _) | ("application", "ogg") =>
self.audio_video_classifer.classify(data),
_ => None
if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
self.sniff_unknown_type(no_sniff_flag, data)
} else {
match no_sniff_flag {
NoSniffFlag::ON => supplied_type.clone(),
NoSniffFlag::OFF => match apache_bug_flag {
ApacheBugFlag::ON => self.sniff_text_or_data(data),
ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
media_subtype) {
Some(MediaType::Xml) => supplied_type.clone(),
Some(MediaType::Html) =>
//Implied in section 7.3, but flow is not clear
self.feeds_classifier.classify(data).or(supplied_type.clone()),
Some(MediaType::Image) => self.image_classifier.classify(data),
Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
None => None
}.or(supplied_type.clone())
}
}
@ -56,26 +69,30 @@ impl MIMEClassifier {
pub fn new() -> MIMEClassifier {
MIMEClassifier {
image_classifier: GroupedClassifier::image_classifer(),
audio_video_classifer: GroupedClassifier::audio_video_classifer(),
audio_video_classifier: GroupedClassifier::audio_video_classifier(),
scriptable_classifier: GroupedClassifier::scriptable_classifier(),
plaintext_classifier: GroupedClassifier::plaintext_classifier(),
archive_classifer: GroupedClassifier::archive_classifier(),
archive_classifier: GroupedClassifier::archive_classifier(),
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
}
}
//some sort of iterator over the classifiers might be better?
fn sniff_unknown_type(&self, sniff_scriptable: bool, data: &[u8]) ->
fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) ->
Option<(String, String)> {
if sniff_scriptable {
let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::OFF;
let sniffed = if should_sniff_scriptable {
self.scriptable_classifier.classify(data)
} else {
None
}.or_else(|| self.plaintext_classifier.classify(data))
.or_else(|| self.image_classifier.classify(data))
.or_else(|| self.audio_video_classifer.classify(data))
.or_else(|| self.archive_classifer.classify(data))
.or_else(|| self.binary_or_plaintext.classify(data))
};
sniffed.or_else(|| self.plaintext_classifier.classify(data))
.or_else(|| self.image_classifier.classify(data))
.or_else(|| self.audio_video_classifier.classify(data))
.or_else(|| self.archive_classifier.classify(data))
.or_else(|| self.binary_or_plaintext.classify(data))
}
fn sniff_text_or_data(&self, data: &[u8]) -> Option<(String, String)> {
@ -93,6 +110,40 @@ impl MIMEClassifier {
fn is_html(tp: &str, sub_tp: &str) -> bool {
tp == "text" && sub_tp == "html"
}
fn is_image(tp: &str) -> bool {
tp == "image"
}
fn is_audio_video(tp: &str, sub_tp: &str) -> bool {
tp == "audio" ||
tp == "video" ||
(tp == "application" && sub_tp == "ogg")
}
fn is_explicit_unknown(tp: &str, sub_tp: &str) -> bool {
match(tp, sub_tp) {
("unknown", "unknown") |
("application", "unknown") |
("*", "*") => true,
_ => false
}
}
fn get_media_type(media_type: &String,
media_subtype: &String) -> Option<MediaType> {
if MIMEClassifier::is_xml(media_type, media_subtype) {
Some(MediaType::Xml)
} else if MIMEClassifier::is_html(media_type, media_subtype) {
Some(MediaType::Html)
} else if MIMEClassifier::is_image(media_type) {
Some(MediaType::Image)
} else if MIMEClassifier::is_audio_video(media_type, media_subtype) {
Some(MediaType::AudioVideo)
} else {
None
}
}
}
pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<(String, String)> {
@ -227,8 +278,8 @@ struct BinaryOrPlaintextClassifier;
impl BinaryOrPlaintextClassifier {
fn classify_impl(&self, data: &[u8]) -> (&'static str, &'static str) {
if data == &[0xFFu8, 0xFEu8] ||
data == &[0xFEu8, 0xFFu8] ||
if data.starts_with(&[0xFFu8, 0xFEu8]) ||
data.starts_with(&[0xFEu8, 0xFFu8]) ||
data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
{
("text", "plain")
@ -265,7 +316,7 @@ impl GroupedClassifier {
]
}
}
fn audio_video_classifer() -> GroupedClassifier {
fn audio_video_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::video_webm(),

Просмотреть файл

@ -10,8 +10,7 @@ use cookie_storage::CookieStorage;
use data_loader;
use file_loader;
use http_loader::{self, create_http_connector, Connector};
use mime_classifier::MIMEClassifier;
use mime_classifier::{ApacheBugFlag, MIMEClassifier, NoSniffFlag};
use net_traits::ProgressMsg::Done;
use net_traits::{ControlMsg, LoadData, LoadResponse, LoadConsumer, CookieSource};
use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction};
@ -29,7 +28,9 @@ use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
use std::borrow::ToOwned;
use std::boxed::FnBox;
use std::sync::Arc;
use std::sync::{Arc};
use std::sync::mpsc::{channel, Sender};
pub enum ProgressSender {
@ -72,21 +73,20 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
-> Result<ProgressSender, ()> {
if opts::get().sniff_mime_types {
// TODO: should be calculated in the resource loader, from pull requeset #4094
let mut nosniff = false;
let mut check_for_apache_bug = false;
let mut no_sniff = NoSniffFlag::OFF;
let mut check_for_apache_bug = ApacheBugFlag::OFF;
if let Some(ref headers) = metadata.headers {
if let Some(ref raw_content_type) = headers.get_raw("content-type") {
if raw_content_type.len() > 0 {
let ref last_raw_content_type = raw_content_type[raw_content_type.len() - 1];
check_for_apache_bug = last_raw_content_type == b"text/plain"
|| last_raw_content_type == b"text/plain; charset=ISO-8859-1"
|| last_raw_content_type == b"text/plain; charset=iso-8859-1"
|| last_raw_content_type == b"text/plain; charset=UTF-8";
check_for_apache_bug = apache_bug_predicate(last_raw_content_type)
}
}
if let Some(ref raw_content_type_options) = headers.get_raw("X-content-type-options") {
nosniff = raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff");
if raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff") {
no_sniff = NoSniffFlag::ON
}
}
}
@ -94,7 +94,7 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| {
(format!("{}", toplevel), format!("{}", sublevel))
});
metadata.content_type = classifier.classify(nosniff, check_for_apache_bug, &supplied_type,
metadata.content_type = classifier.classify(no_sniff, check_for_apache_bug, &supplied_type,
&partial_body).map(|(toplevel, sublevel)| {
let mime_tp: TopLevel = toplevel.parse().unwrap();
let mime_sb: SubLevel = sublevel.parse().unwrap();
@ -106,6 +106,17 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
start_sending_opt(start_chan, metadata)
}
fn apache_bug_predicate(last_raw_content_type: &[u8]) -> ApacheBugFlag {
if last_raw_content_type == b"text/plain"
|| last_raw_content_type == b"text/plain; charset=ISO-8859-1"
|| last_raw_content_type == b"text/plain; charset=iso-8859-1"
|| last_raw_content_type == b"text/plain; charset=UTF-8" {
ApacheBugFlag::ON
} else {
ApacheBugFlag::OFF
}
}
/// For use by loaders in responding to a Load message.
pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result<ProgressSender, ()> {
match start_chan {

Просмотреть файл

@ -3,7 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use net::mime_classifier::as_string_option;
use net::mime_classifier::{Mp4Matcher, MIMEClassifier};
use net::mime_classifier::{Mp4Matcher, MIMEClassifier, ApacheBugFlag, NoSniffFlag};
use std::env;
use std::fs::File;
use std::io::{self, Read};
@ -37,8 +37,12 @@ fn test_sniff_mp4_matcher() {
}
#[cfg(test)]
fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>) {
fn test_sniff_with_flags(filename_orig: &path::Path,
type_string: &str,
subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>,
no_sniff_flag: NoSniffFlag,
apache_bug_flag: ApacheBugFlag) {
let current_working_directory = env::current_dir().unwrap();
println!("The current directory is {}", current_working_directory.display());
@ -51,7 +55,7 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
match read_result {
Ok(data) => {
match classifier.classify(false, false, &as_string_option(supplied_type), &data) {
match classifier.classify(no_sniff_flag, apache_bug_flag, &as_string_option(supplied_type), &data) {
Some((parsed_type, parsed_subtp)) => {
if (&parsed_type[..] != type_string) ||
(&parsed_subtp[..] != subtype_string) {
@ -69,6 +73,17 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
}
}
#[cfg(test)]
fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>) {
test_sniff_with_flags(filename_orig,
type_string,
subtype_string,
supplied_type,
NoSniffFlag::OFF,
ApacheBugFlag::OFF)
}
#[cfg(test)]
fn test_sniff_classification(file: &str, type_string: &str, subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>) {
@ -448,3 +463,79 @@ fn test_sniff_rss_feed() {
fn test_sniff_atom_feed() {
test_sniff_full(&PathBuf::from("text/xml/feed.atom"), "application", "atom+xml", Some(("text", "html")));
}
#[test]
fn test_sniff_binary_file() {
test_sniff_full(&PathBuf::from("unknown/binary_file"), "application", "octet-stream", None);
}
#[test]
fn test_sniff_atom_feed_with_no_sniff_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
"text",
"html",
Some(("text", "html")),
NoSniffFlag::ON,
ApacheBugFlag::OFF);
}
#[test]
fn test_sniff_with_no_sniff_flag_on_and_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
"text",
"html",
Some(("text", "html")),
NoSniffFlag::ON,
ApacheBugFlag::ON);
}
#[test]
fn test_sniff_utf_8_bom_with_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/plain/utf8bom.txt"),
"text",
"plain",
Some(("dummy", "text")),
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}
#[test]
fn test_sniff_utf_16be_bom_with_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/plain/utf16bebom.txt"),
"text",
"plain",
Some(("dummy", "text")),
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}
#[test]
fn test_sniff_utf_16le_bom_with_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("text/plain/utf16lebom.txt"),
"text",
"plain",
Some(("dummy", "text")),
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}
#[test]
fn test_sniff_octet_stream_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("unknown/binary_file"),
"application",
"octet-stream",
Some(("dummy", "binary")),
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}
#[test]
fn test_sniff_mp4_video_apache_flag_on() {
test_sniff_with_flags(&PathBuf::from("video/mp4/test.mp4"),
"application",
"octet-stream",
Some(("video", "mp4")),
NoSniffFlag::OFF,
ApacheBugFlag::ON);
}

Двоичные данные
servo/tests/unit/net/parsable_mime/unknown/binary_file Normal file

Двоичный файл не отображается.