Bug 1615974 - avoid memmapping CRLite filters in cert_storage r=jschanck,robwu

Differential Revision: https://phabricator.services.mozilla.com/D140266
This commit is contained in:
Dana Keeler 2022-03-09 22:46:15 +00:00
Родитель 89abef6e8a
Коммит 23c938c2f3
8 изменённых файлов: 159 добавлений и 175 удалений

8
Cargo.lock сгенерированный
Просмотреть файл

@ -611,7 +611,6 @@ version = "0.1.0"
dependencies = [ dependencies = [
"nserror", "nserror",
"nsstring", "nsstring",
"rental",
"rust_cascade", "rust_cascade",
"thin-vec", "thin-vec",
"xpcom", "xpcom",
@ -632,12 +631,9 @@ dependencies = [
"crossbeam-utils 0.8.6", "crossbeam-utils 0.8.6",
"cstr", "cstr",
"log", "log",
"malloc_size_of_derive",
"memmap2 0.3.1",
"moz_task", "moz_task",
"nserror", "nserror",
"nsstring", "nsstring",
"rental",
"rkv", "rkv",
"rust_cascade", "rust_cascade",
"sha2", "sha2",
@ -4431,9 +4427,9 @@ checksum = "8a654c5bda722c699be6b0fe4c0d90de218928da5b724c3e467fc48865c37263"
[[package]] [[package]]
name = "rust_cascade" name = "rust_cascade"
version = "0.6.0" version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a5b9bba8f5b985e4923dadd273a987f83669083f3355d65c699e02b9d3d854d" checksum = "d09c17a9310f1eb79a67d307adffa7fa1c5943eaadcc21d4fb7f611536d66c4f"
dependencies = [ dependencies = [
"byteorder", "byteorder",
"digest", "digest",

Просмотреть файл

@ -9,18 +9,15 @@ byteorder = "1.2.7"
crossbeam-utils = "0.8" crossbeam-utils = "0.8"
cstr = "0.2" cstr = "0.2"
log = "0.4" log = "0.4"
memmap2 = "0.3"
moz_task = { path = "../../../../xpcom/rust/moz_task" } moz_task = { path = "../../../../xpcom/rust/moz_task" }
nserror = { path = "../../../../xpcom/rust/nserror" } nserror = { path = "../../../../xpcom/rust/nserror" }
nsstring = { path = "../../../../xpcom/rust/nsstring" } nsstring = { path = "../../../../xpcom/rust/nsstring" }
rental = "0.5.5"
rkv = { version = "0.17", default-features = false } rkv = { version = "0.17", default-features = false }
rust_cascade = "0.6.0" rust_cascade = "1.2.0"
sha2 = "^0.8" sha2 = "^0.8"
storage_variant = { path = "../../../../storage/variant" } storage_variant = { path = "../../../../storage/variant" }
tempfile = "3" tempfile = "3"
thin-vec = { version = "0.2.1", features = ["gecko-ffi"] } thin-vec = { version = "0.2.1", features = ["gecko-ffi"] }
time = "0.1" time = "0.1"
xpcom = { path = "../../../../xpcom/rust/xpcom" } xpcom = { path = "../../../../xpcom/rust/xpcom" }
malloc_size_of_derive = "0.1"
wr_malloc_size_of = { path = "../../../../gfx/wr/wr_malloc_size_of" } wr_malloc_size_of = { path = "../../../../gfx/wr/wr_malloc_size_of" }

Просмотреть файл

@ -9,12 +9,9 @@ extern crate crossbeam_utils;
extern crate cstr; extern crate cstr;
#[macro_use] #[macro_use]
extern crate log; extern crate log;
extern crate memmap2;
extern crate moz_task; extern crate moz_task;
extern crate nserror; extern crate nserror;
extern crate nsstring; extern crate nsstring;
#[macro_use]
extern crate rental;
extern crate rkv; extern crate rkv;
extern crate rust_cascade; extern crate rust_cascade;
extern crate sha2; extern crate sha2;
@ -22,8 +19,6 @@ extern crate thin_vec;
extern crate time; extern crate time;
#[macro_use] #[macro_use]
extern crate xpcom; extern crate xpcom;
#[macro_use]
extern crate malloc_size_of_derive;
extern crate storage_variant; extern crate storage_variant;
extern crate tempfile; extern crate tempfile;
@ -33,7 +28,6 @@ use wr_malloc_size_of as malloc_size_of;
use byteorder::{LittleEndian, NetworkEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{LittleEndian, NetworkEndian, ReadBytesExt, WriteBytesExt};
use crossbeam_utils::atomic::AtomicCell; use crossbeam_utils::atomic::AtomicCell;
use malloc_size_of::{MallocSizeOf, MallocSizeOfOps}; use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
use memmap2::Mmap;
use moz_task::{create_background_task_queue, is_main_thread, Task, TaskRunnable}; use moz_task::{create_background_task_queue, is_main_thread, Task, TaskRunnable};
use nserror::{ use nserror::{
nsresult, NS_ERROR_FAILURE, NS_ERROR_NOT_SAME_THREAD, NS_ERROR_NO_AGGREGATION, nsresult, NS_ERROR_FAILURE, NS_ERROR_NOT_SAME_THREAD, NS_ERROR_NO_AGGREGATION,
@ -132,30 +126,12 @@ impl MallocSizeOf for EnvAndStore {
} }
} }
// In Rust, structs cannot have self references (if a struct gets moved, the compiler has no
// guarantees that the references are still valid). In our case, since the memmapped data is at a
// particular place in memory (and that's what we're referencing), we can use the rental crate to
// create a struct that does reference itself.
rental! {
mod holding {
use super::{Cascade, Mmap};
#[rental]
pub struct CRLiteFilter {
backing_file: Box<Mmap>,
cascade: Box<Cascade<'backing_file>>,
}
}
}
/// `SecurityState` /// `SecurityState`
#[derive(MallocSizeOf)]
struct SecurityState { struct SecurityState {
profile_path: PathBuf, profile_path: PathBuf,
env_and_store: Option<EnvAndStore>, env_and_store: Option<EnvAndStore>,
int_prefs: HashMap<String, u32>, int_prefs: HashMap<String, u32>,
#[ignore_malloc_size_of = "rental crate does not allow impls for rental structs"] crlite_filter: Option<Cascade>,
crlite_filter: Option<holding::CRLiteFilter>,
/// Maps issuer spki hashes to sets of serial numbers. /// Maps issuer spki hashes to sets of serial numbers.
crlite_stash: Option<HashMap<Vec<u8>, HashSet<Vec<u8>>>>, crlite_stash: Option<HashMap<Vec<u8>, HashSet<Vec<u8>>>>,
/// Maps an RFC 6962 LogID to a pair of 64 bit unix timestamps /// Maps an RFC 6962 LogID to a pair of 64 bit unix timestamps
@ -529,15 +505,12 @@ impl SecurityState {
if !path.exists() { if !path.exists() {
return Ok(()); return Ok(());
} }
let filter_file = File::open(path)?; let mut filter_file = File::open(path)?;
let mmap = unsafe { Mmap::map(&filter_file)? }; let mut filter_bytes = Vec::new();
let crlite_filter = holding::CRLiteFilter::try_new(Box::new(mmap), |mmap| { let _ = filter_file.read_to_end(&mut filter_bytes)?;
match Cascade::from_bytes(mmap)? { let crlite_filter = *Cascade::from_bytes(filter_bytes)
Some(cascade) => Ok(cascade), .map_err(|_| SecurityStateError::from("invalid CRLite filter"))?
None => Err(SecurityStateError::from("invalid CRLite filter")), .ok_or(SecurityStateError::from("expecting non-empty filter"))?;
}
})
.map_err(|_| SecurityStateError::from("unable to initialize CRLite filter"))?;
let mut path = get_store_path(&self.profile_path)?; let mut path = get_store_path(&self.profile_path)?;
path.push("crlite.coverage"); path.push("crlite.coverage");
@ -672,7 +645,7 @@ impl SecurityState {
lookup_key.extend_from_slice(serial_number); lookup_key.extend_from_slice(serial_number);
debug!("CRLite lookup key: {:?}", lookup_key); debug!("CRLite lookup key: {:?}", lookup_key);
let result = match &self.crlite_filter { let result = match &self.crlite_filter {
Some(crlite_filter) => crlite_filter.rent(|filter| filter.has(&lookup_key)), Some(crlite_filter) => crlite_filter.has(&lookup_key),
// This can only happen if the backing file was deleted or if it or our database has // This can only happen if the backing file was deleted or if it or our database has
// become corrupted. In any case, we have no information. // become corrupted. In any case, we have no information.
None => return nsICertStorage::STATE_NOT_COVERED, None => return nsICertStorage::STATE_NOT_COVERED,
@ -881,6 +854,21 @@ impl SecurityState {
} }
} }
impl MallocSizeOf for SecurityState {
fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
self.profile_path.size_of(ops)
+ self.env_and_store.size_of(ops)
+ self.int_prefs.size_of(ops)
+ self
.crlite_filter
.as_ref()
.map_or(0, |crlite_filter| crlite_filter.approximate_size_of())
+ self.crlite_stash.size_of(ops)
+ self.crlite_coverage.size_of(ops)
+ self.remaining_ops.size_of(ops)
}
}
const CERT_SERIALIZATION_VERSION_1: u8 = 1; const CERT_SERIALIZATION_VERSION_1: u8 = 1;
// A Cert consists of its DER encoding, its DER-encoded subject, and its trust (currently // A Cert consists of its DER encoding, its DER-encoded subject, and its trust (currently

Просмотреть файл

@ -1 +1 @@
{"files":{"Cargo.toml":"411cb740d6be8346206164df646ac9df304e9a84bb9f10eb4b07d2ef2f6566ec","README.md":"a4396d1adf63a77ae9aa0d1d850d02d09eec4a92810a52d675163688f312b3e8","license.txt":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/lib.rs":"2c6d1e01ae3a39baad99cd4567b0164dec4dcf77688bc2c3b43798215c857943","test_data/make-sample-data.py":"68bcb106c3ac1929da52e1abb71cd2a6d59eb79549f6e40042368161baa920e0","test_data/requirements.txt":"cb9372b33ed2774e0d5040459fd63a2f9abae2be599869be43a2a077b2c08aa3","test_data/test_v1_murmur_mlbf":"243df0b7f2f55bfe3cefbba2d4be5eb7957c0a063559c9f284ca4c1ee4211eb5","test_data/test_v1_murmur_short_mlbf":"3d4f03dc0a65cf5800efed6ac0b3c73e5b61e5d62bc82ac42744abc67f4c30fa","test_data/test_v2_murmur_inverted_mlbf":"efdd0ab309883f6a3148ec2ddaf0dcb768790e6f130e4e0556994202b1fd7cc4","test_data/test_v2_murmur_mlbf":"80e8e148fbf95aed39783f1fcc2d4576074f8c487656ca2d53571da4b17e20a9","test_data/test_v2_sha256_inverted_mlbf":"e5148cabb45c4899f8220ca51f96a6c76c688e39dfd340ae56bf9dc5226eada2","test_data/test_v2_sha256_mlbf":"08986847b8b2f3bdf4d2df51e465938f88f7a7c401b1740094fc40b033e80b51","test_data/test_v2_sha256_salt_mlbf":"d7b9bf88872162a1917eb14d0340a88b61b574fb1a7120fa54d061e43a9f5460"},"package":"9a5b9bba8f5b985e4923dadd273a987f83669083f3355d65c699e02b9d3d854d"} {"files":{"Cargo.toml":"001e85e1a2fb801d92db560e0c6abbdfbff246c0bce600e1908f674819acb1d7","README.md":"a4396d1adf63a77ae9aa0d1d850d02d09eec4a92810a52d675163688f312b3e8","license.txt":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/lib.rs":"ea99597d605feb5a33fbe678ae86fae525042ce48704383e61ee54dd95e0e854","test_data/make-sample-data.py":"68bcb106c3ac1929da52e1abb71cd2a6d59eb79549f6e40042368161baa920e0","test_data/requirements.txt":"cb9372b33ed2774e0d5040459fd63a2f9abae2be599869be43a2a077b2c08aa3","test_data/test_v1_murmur_mlbf":"243df0b7f2f55bfe3cefbba2d4be5eb7957c0a063559c9f284ca4c1ee4211eb5","test_data/test_v1_murmur_short_mlbf":"3d4f03dc0a65cf5800efed6ac0b3c73e5b61e5d62bc82ac42744abc67f4c30fa","test_data/test_v2_murmur_inverted_mlbf":"efdd0ab309883f6a3148ec2ddaf0dcb768790e6f130e4e0556994202b1fd7cc4","test_data/test_v2_murmur_mlbf":"80e8e148fbf95aed39783f1fcc2d4576074f8c487656ca2d53571da4b17e20a9","test_data/test_v2_sha256_inverted_mlbf":"e5148cabb45c4899f8220ca51f96a6c76c688e39dfd340ae56bf9dc5226eada2","test_data/test_v2_sha256_mlbf":"08986847b8b2f3bdf4d2df51e465938f88f7a7c401b1740094fc40b033e80b51","test_data/test_v2_sha256_salt_mlbf":"d7b9bf88872162a1917eb14d0340a88b61b574fb1a7120fa54d061e43a9f5460"},"package":"d09c17a9310f1eb79a67d307adffa7fa1c5943eaadcc21d4fb7f611536d66c4f"}

13
third_party/rust/rust_cascade/Cargo.toml поставляемый
Просмотреть файл

@ -3,18 +3,17 @@
# When uploading crates to the registry Cargo will automatically # When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility # "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies # with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies # to registry (e.g., crates.io) dependencies.
# #
# If you believe there's an error in this file please file an # If you are reading this file be aware that the original Cargo.toml
# issue against the rust-lang/cargo repository. If you're # will likely look very different (and much more reasonable).
# editing this file be aware that the upstream Cargo.toml # See Cargo.toml.orig for the original contents.
# will likely look very different (and much more reasonable)
[package] [package]
name = "rust_cascade" name = "rust_cascade"
version = "0.6.0" version = "1.2.0"
authors = ["Mark Goodwin <mgoodwin@mozilla.com>", "Dana Keeler <dkeeler@mozilla.com>", "J.C. Jones <jc@mozilla.com>"] authors = ["Mark Goodwin <mgoodwin@mozilla.com>", "Dana Keeler <dkeeler@mozilla.com>", "J.C. Jones <jc@mozilla.com>"]
description = "A simple mmh3 based bloom filter cascade implementation in Rust." description = "A simple bloom filter cascade implementation in Rust."
homepage = "https://github.com/mozilla/rust-cascade" homepage = "https://github.com/mozilla/rust-cascade"
documentation = "https://docs.rs/rust_cascade/" documentation = "https://docs.rs/rust_cascade/"
license = "MPL-2.0" license = "MPL-2.0"

214
third_party/rust/rust_cascade/src/lib.rs поставляемый
Просмотреть файл

@ -8,25 +8,26 @@ use murmurhash3::murmurhash3_x86_32;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};
use std::fmt; use std::fmt;
use std::io::{Error, ErrorKind}; use std::io::{Error, ErrorKind, Read};
use std::mem::size_of;
/// Helper struct to provide read-only bit access to a slice of bytes. /// Helper struct to provide read-only bit access to a vector of bytes.
struct BitSlice<'a> { struct BitVector {
/// The slice of bytes we're interested in. /// The bytes we're interested in.
bytes: &'a [u8], bytes: Vec<u8>,
/// The number of bits that are valid to access in the slice. /// The number of bits that are valid to access in the vector.
/// Not necessarily equal to `bytes.len() * 8`, but it will not be greater than that. /// Not necessarily equal to `bytes.len() * 8`, but it will not be greater than that.
bit_len: usize, bit_len: usize,
} }
impl<'a> BitSlice<'a> { impl BitVector {
/// Creates a new `BitSlice` of the given bit length over the given slice of data. /// Creates a new `BitVector` of the given bit length over the given data.
/// Panics if the indicated bit length is larger than fits in the slice. /// Panics if the indicated bit length is larger than fits in the vector.
/// ///
/// # Arguments /// # Arguments
/// * `bytes` - The slice of bytes we need bit-access to /// * `bytes` - The bytes we need bit-access to
/// * `bit_len` - The number of bits that are valid to access in the slice /// * `bit_len` - The number of bits that are valid to access in the vector
fn new(bytes: &'a [u8], bit_len: usize) -> BitSlice<'a> { fn new(bytes: Vec<u8>, bit_len: usize) -> BitVector {
if bit_len > bytes.len() * 8 { if bit_len > bytes.len() * 8 {
panic!( panic!(
"bit_len too large for given data: {} > {} * 8", "bit_len too large for given data: {} > {} * 8",
@ -34,7 +35,7 @@ impl<'a> BitSlice<'a> {
bytes.len() bytes.len()
); );
} }
BitSlice { bytes, bit_len } BitVector { bytes, bit_len }
} }
/// Get the value of the specified bit. /// Get the value of the specified bit.
@ -45,7 +46,7 @@ impl<'a> BitSlice<'a> {
fn get(&self, bit_index: usize) -> bool { fn get(&self, bit_index: usize) -> bool {
if bit_index >= self.bit_len { if bit_index >= self.bit_len {
panic!( panic!(
"bit index out of range for bit slice: {} >= {}", "bit index out of range for bit vector: {} >= {}",
bit_index, self.bit_len bit_index, self.bit_len
); );
} }
@ -68,7 +69,7 @@ impl<'a> BitSlice<'a> {
} }
/// A Bloom filter representing a specific level in a multi-level cascading Bloom filter. /// A Bloom filter representing a specific level in a multi-level cascading Bloom filter.
struct Bloom<'a> { struct Bloom {
/// What level this filter is in /// What level this filter is in
level: u8, level: u8,
/// How many hash functions this filter uses /// How many hash functions this filter uses
@ -76,7 +77,7 @@ struct Bloom<'a> {
/// The bit length of the filter /// The bit length of the filter
size: u32, size: u32,
/// The data of the filter /// The data of the filter
bit_slice: BitSlice<'a>, bit_vector: BitVector,
/// The hash algorithm enumeration in use /// The hash algorithm enumeration in use
hash_algorithm: HashAlgorithm, hash_algorithm: HashAlgorithm,
} }
@ -108,13 +109,12 @@ impl TryFrom<u8> for HashAlgorithm {
} }
} }
impl<'a> Bloom<'a> { impl Bloom {
/// Attempts to decode and return a pair that consists of the Bloom filter represented by the /// Attempts to decode the Bloom filter represented by the bytes in the given reader.
/// given bytes and any remaining unprocessed bytes in the given bytes.
/// ///
/// # Arguments /// # Arguments
/// * `bytes` - The encoded representation of this Bloom filter. May include additional data /// * `reader` - The encoded representation of this Bloom filter. May be empty. May include
/// describing further Bloom filters. Any additional data is returned unconsumed. /// additional data describing further Bloom filters.
/// The format of an encoded Bloom filter is: /// The format of an encoded Bloom filter is:
/// [1 byte] - the hash algorithm to use in the filter /// [1 byte] - the hash algorithm to use in the filter
/// [4 little endian bytes] - the length in bits of the filter /// [4 little endian bytes] - the length in bits of the filter
@ -122,11 +122,15 @@ impl<'a> Bloom<'a> {
/// [1 byte] - which level in the cascade this filter is /// [1 byte] - which level in the cascade this filter is
/// [variable length bytes] - the filter itself (the length is determined by Ceiling(bit length /// [variable length bytes] - the filter itself (the length is determined by Ceiling(bit length
/// / 8) /// / 8)
pub fn from_bytes(bytes: &'a [u8]) -> Result<(Bloom<'a>, &'a [u8]), Error> { pub fn read<R: Read>(reader: &mut R) -> Result<Option<Bloom>, Error> {
let mut cursor = bytes;
// Load the layer metadata. bloomer.py writes size, nHashFuncs and level as little-endian // Load the layer metadata. bloomer.py writes size, nHashFuncs and level as little-endian
// unsigned ints. // unsigned ints.
let hash_algorithm_val = cursor.read_u8()?; let hash_algorithm_val = match reader.read_u8() {
Ok(val) => val,
// If reader is at EOF, there is no bloom filter.
Err(e) if e.kind() == ErrorKind::UnexpectedEof => return Ok(None),
Err(e) => return Err(e),
};
let hash_algorithm = match HashAlgorithm::try_from(hash_algorithm_val) { let hash_algorithm = match HashAlgorithm::try_from(hash_algorithm_val) {
Ok(algo) => algo, Ok(algo) => algo,
Err(()) => { Err(()) => {
@ -137,9 +141,9 @@ impl<'a> Bloom<'a> {
} }
}; };
let size = cursor.read_u32::<byteorder::LittleEndian>()?; let size = reader.read_u32::<byteorder::LittleEndian>()?;
let n_hash_funcs = cursor.read_u32::<byteorder::LittleEndian>()?; let n_hash_funcs = reader.read_u32::<byteorder::LittleEndian>()?;
let level = cursor.read_u8()?; let level = reader.read_u8()?;
let shifted_size = size.wrapping_shr(3) as usize; let shifted_size = size.wrapping_shr(3) as usize;
let byte_count = if size % 8 != 0 { let byte_count = if size % 8 != 0 {
@ -147,24 +151,19 @@ impl<'a> Bloom<'a> {
} else { } else {
shifted_size shifted_size
}; };
if byte_count > cursor.len() { let mut bits_bytes = vec![0; byte_count];
return Err(Error::new( reader.read_exact(&mut bits_bytes)?;
ErrorKind::InvalidData,
"Invalid Bloom filter: too short",
));
}
let (bits_bytes, rest_of_bytes) = cursor.split_at(byte_count);
let bloom = Bloom { let bloom = Bloom {
level, level,
n_hash_funcs, n_hash_funcs,
size, size,
bit_slice: BitSlice::new(bits_bytes, size as usize), bit_vector: BitVector::new(bits_bytes, size as usize),
hash_algorithm, hash_algorithm,
}; };
Ok((bloom, rest_of_bytes)) Ok(Some(bloom))
} }
fn hash(&self, n_fn: u32, key: &[u8], salt: Option<&[u8]>) -> u32 { fn hash(&self, n_fn: u32, key: &[u8], salt: Option<&Vec<u8>>) -> u32 {
match self.hash_algorithm { match self.hash_algorithm {
HashAlgorithm::MurmurHash3 => { HashAlgorithm::MurmurHash3 => {
if salt.is_some() { if salt.is_some() {
@ -195,9 +194,9 @@ impl<'a> Bloom<'a> {
/// ///
/// # Arguments /// # Arguments
/// `item` - The slice of bytes to test for /// `item` - The slice of bytes to test for
pub fn has(&self, item: &[u8], salt: Option<&[u8]>) -> bool { fn has(&self, item: &[u8], salt: Option<&Vec<u8>>) -> bool {
for i in 0..self.n_hash_funcs { for i in 0..self.n_hash_funcs {
if !self.bit_slice.get(self.hash(i, item, salt) as usize) { if !self.bit_vector.get(self.hash(i, item, salt) as usize) {
return false; return false;
} }
} }
@ -205,7 +204,7 @@ impl<'a> Bloom<'a> {
} }
} }
impl<'a> fmt::Display for Bloom<'a> { impl fmt::Display for Bloom {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!( write!(
f, f,
@ -216,21 +215,19 @@ impl<'a> fmt::Display for Bloom<'a> {
} }
/// A multi-level cascading Bloom filter. /// A multi-level cascading Bloom filter.
pub struct Cascade<'a> { pub struct Cascade {
/// The Bloom filter for this level in the cascade /// The Bloom filter for this level in the cascade
filter: Bloom<'a>, filter: Bloom,
/// The next (lower) level in the cascade /// The next (lower) level in the cascade
child_layer: Option<Box<Cascade<'a>>>, child_layer: Option<Box<Cascade>>,
/// The salt in use, if any /// The salt in use, if any
salt: Option<&'a [u8]>, salt: Option<Vec<u8>>,
/// Whether the logic should be inverted /// Whether the logic should be inverted
inverted: bool, inverted: bool,
} }
impl<'a> Cascade<'a> { impl Cascade {
/// Attempts to decode and return a multi-level cascading Bloom filter. NB: `Cascade` does not /// Attempts to decode and return a multi-level cascading Bloom filter.
/// take ownership of the given data. This is to facilitate decoding cascading filters
/// backed by memory-mapped files.
/// ///
/// # Arguments /// # Arguments
/// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2 /// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2
@ -239,31 +236,23 @@ impl<'a> Cascade<'a> {
/// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py /// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py
/// ///
/// May be of length 0, in which case `None` is returned. /// May be of length 0, in which case `None` is returned.
pub fn from_bytes(bytes: &'a [u8]) -> Result<Option<Box<Cascade<'a>>>, Error> { pub fn from_bytes(bytes: Vec<u8>) -> Result<Option<Box<Cascade>>, Error> {
if bytes.is_empty() { if bytes.is_empty() {
return Ok(None); return Ok(None);
} }
let mut cursor = bytes; let mut reader = bytes.as_slice();
let version = cursor.read_u16::<byteorder::LittleEndian>()?; let version = reader.read_u16::<byteorder::LittleEndian>()?;
let mut salt = None; let mut salt = None;
let mut inverted = false; let mut inverted = false;
if version >= 2 { if version >= 2 {
inverted = cursor.read_u8()? != 0; inverted = reader.read_u8()? != 0;
let salt_len = cursor.read_u8()? as usize; let salt_len = reader.read_u8()? as usize;
if salt_len > cursor.len() {
return Err(Error::new(
ErrorKind::InvalidData,
"Invalid Bloom filter: too short",
));
}
let (salt_bytes, remaining_bytes) = cursor.split_at(salt_len);
if salt_len > 0 { if salt_len > 0 {
salt = Some(salt_bytes) let mut salt_bytes = vec![0; salt_len];
reader.read_exact(&mut salt_bytes)?;
salt = Some(salt_bytes);
} }
cursor = remaining_bytes;
} }
if version > 2 { if version > 2 {
@ -273,22 +262,23 @@ impl<'a> Cascade<'a> {
)); ));
} }
Cascade::child_layer_from_bytes(cursor, salt, inverted) Cascade::child_layer_from_bytes(reader, salt, inverted)
} }
fn child_layer_from_bytes( fn child_layer_from_bytes<R: Read>(
bytes: &'a [u8], mut reader: R,
salt: Option<&'a [u8]>, salt: Option<Vec<u8>>,
inverted: bool, inverted: bool,
) -> Result<Option<Box<Cascade<'a>>>, Error> { ) -> Result<Option<Box<Cascade>>, Error> {
if bytes.is_empty() { let filter = match Bloom::read(&mut reader)? {
return Ok(None); Some(filter) => filter,
} None => return Ok(None),
let (filter, rest_of_bytes) = Bloom::from_bytes(bytes)?; };
let our_salt = salt.as_ref().cloned();
Ok(Some(Box::new(Cascade { Ok(Some(Box::new(Cascade {
filter, filter,
child_layer: Cascade::child_layer_from_bytes(rest_of_bytes, salt, inverted)?, child_layer: Cascade::child_layer_from_bytes(reader, salt, inverted)?,
salt, salt: our_salt,
inverted, inverted,
}))) })))
} }
@ -305,8 +295,8 @@ impl<'a> Cascade<'a> {
result result
} }
pub fn has_internal(&self, entry: &[u8]) -> bool { fn has_internal(&self, entry: &[u8]) -> bool {
if self.filter.has(&entry, self.salt) { if self.filter.has(entry, self.salt.as_ref()) {
match self.child_layer { match self.child_layer {
Some(ref child) => { Some(ref child) => {
let child_value = !child.has_internal(entry); let child_value = !child.has_internal(entry);
@ -319,9 +309,25 @@ impl<'a> Cascade<'a> {
} }
false false
} }
/// Determine the approximate amount of memory in bytes used by this
/// Cascade. Because this implementation does not integrate with the
/// allocator, it can't get an accurate measurement of how much memory it
/// uses. However, it can make a reasonable guess, assuming the sizes of
/// the bloom filters are large enough to dominate the overall allocated
/// size.
pub fn approximate_size_of(&self) -> usize {
size_of::<Cascade>()
+ self.filter.bit_vector.bytes.len()
+ self
.child_layer
.as_ref()
.map_or(0, |child_layer| child_layer.approximate_size_of())
+ self.salt.as_ref().map_or(0, |salt| salt.len())
}
} }
impl<'a> fmt::Display for Cascade<'a> { impl fmt::Display for Cascade {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!( write!(
f, f,
@ -345,34 +351,41 @@ mod tests {
let src: Vec<u8> = vec![ let src: Vec<u8> = vec![
0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00, 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00,
]; ];
let mut reader = src.as_slice();
match Bloom::from_bytes(&src) { match Bloom::read(&mut reader) {
Ok((bloom, rest_of_bytes)) => { Ok(Some(bloom)) => {
assert!(rest_of_bytes.len() == 0);
assert!(bloom.has(b"this", None) == true); assert!(bloom.has(b"this", None) == true);
assert!(bloom.has(b"that", None) == true); assert!(bloom.has(b"that", None) == true);
assert!(bloom.has(b"other", None) == false); assert!(bloom.has(b"other", None) == false);
} }
Err(_) => { Ok(None) => panic!("Parsing failed"),
panic!("Parsing failed"); Err(_) => panic!("Parsing failed"),
}
}; };
assert!(reader.is_empty());
let short: Vec<u8> = vec![ let short: Vec<u8> = vec![
0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41,
]; ];
assert!(Bloom::from_bytes(&short).is_err()); assert!(Bloom::read(&mut short.as_slice()).is_err());
let empty: Vec<u8> = Vec::new();
let mut reader = empty.as_slice();
match Bloom::read(&mut reader) {
Ok(should_be_none) => assert!(should_be_none.is_none()),
Err(_) => panic!("Parsing failed"),
};
} }
#[test] #[test]
fn bloom_v3_unsupported() { fn bloom_v3_unsupported() {
let src: Vec<u8> = vec![0x03, 0x01, 0x00]; let src: Vec<u8> = vec![0x03, 0x01, 0x00];
assert!(Bloom::from_bytes(&src).is_err()); assert!(Bloom::read(&mut src.as_slice()).is_err());
} }
#[test] #[test]
fn cascade_v1_murmur_from_file_bytes_test() { fn cascade_v1_murmur_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v1_murmur_mlbf"); let v = include_bytes!("../test_data/test_v1_murmur_mlbf").to_vec();
let cascade = Cascade::from_bytes(v) let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed") .expect("parsing Cascade should succeed")
.expect("Cascade should be Some"); .expect("Cascade should be Some");
@ -401,13 +414,15 @@ mod tests {
0x77, 0x8e ]; 0x77, 0x8e ];
assert!(!cascade.has(&key_for_valid_cert)); assert!(!cascade.has(&key_for_valid_cert));
let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf"); assert_eq!(cascade.approximate_size_of(), 15632);
let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf").to_vec();
assert!(Cascade::from_bytes(v).is_err()); assert!(Cascade::from_bytes(v).is_err());
} }
#[test] #[test]
fn cascade_v2_sha256_from_file_bytes_test() { fn cascade_v2_sha256_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_sha256_mlbf"); let v = include_bytes!("../test_data/test_v2_sha256_mlbf").to_vec();
let cascade = Cascade::from_bytes(v) let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed") .expect("parsing Cascade should succeed")
.expect("Cascade should be Some"); .expect("Cascade should be Some");
@ -417,25 +432,27 @@ mod tests {
assert!(cascade.has(b"this") == true); assert!(cascade.has(b"this") == true);
assert!(cascade.has(b"that") == true); assert!(cascade.has(b"that") == true);
assert!(cascade.has(b"other") == false); assert!(cascade.has(b"other") == false);
assert_eq!(cascade.approximate_size_of(), 10247);
} }
#[test] #[test]
fn cascade_v2_sha256_with_salt_from_file_bytes_test() { fn cascade_v2_sha256_with_salt_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_sha256_salt_mlbf"); let v = include_bytes!("../test_data/test_v2_sha256_salt_mlbf").to_vec();
let cascade = Cascade::from_bytes(v) let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed") .expect("parsing Cascade should succeed")
.expect("Cascade should be Some"); .expect("Cascade should be Some");
assert!(cascade.salt == Some(b"nacl")); assert!(cascade.salt == Some(b"nacl".to_vec()));
assert!(cascade.inverted == false); assert!(cascade.inverted == false);
assert!(cascade.has(b"this") == true); assert!(cascade.has(b"this") == true);
assert!(cascade.has(b"that") == true); assert!(cascade.has(b"that") == true);
assert!(cascade.has(b"other") == false); assert!(cascade.has(b"other") == false);
assert_eq!(cascade.approximate_size_of(), 10251);
} }
#[test] #[test]
fn cascade_v2_murmur_from_file_bytes_test() { fn cascade_v2_murmur_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_murmur_mlbf"); let v = include_bytes!("../test_data/test_v2_murmur_mlbf").to_vec();
let cascade = Cascade::from_bytes(v) let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed") .expect("parsing Cascade should succeed")
.expect("Cascade should be Some"); .expect("Cascade should be Some");
@ -445,11 +462,12 @@ mod tests {
assert!(cascade.has(b"this") == true); assert!(cascade.has(b"this") == true);
assert!(cascade.has(b"that") == true); assert!(cascade.has(b"that") == true);
assert!(cascade.has(b"other") == false); assert!(cascade.has(b"other") == false);
assert_eq!(cascade.approximate_size_of(), 10247);
} }
#[test] #[test]
fn cascade_v2_murmur_inverted_from_file_bytes_test() { fn cascade_v2_murmur_inverted_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf"); let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf").to_vec();
let cascade = Cascade::from_bytes(v) let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed") .expect("parsing Cascade should succeed")
.expect("Cascade should be Some"); .expect("Cascade should be Some");
@ -459,11 +477,12 @@ mod tests {
assert!(cascade.has(b"this") == true); assert!(cascade.has(b"this") == true);
assert!(cascade.has(b"that") == true); assert!(cascade.has(b"that") == true);
assert!(cascade.has(b"other") == false); assert!(cascade.has(b"other") == false);
assert_eq!(cascade.approximate_size_of(), 10247);
} }
#[test] #[test]
fn cascade_v2_sha256_inverted_from_file_bytes_test() { fn cascade_v2_sha256_inverted_from_file_bytes_test() {
let v = include_bytes!("../test_data/test_v2_sha256_inverted_mlbf"); let v = include_bytes!("../test_data/test_v2_sha256_inverted_mlbf").to_vec();
let cascade = Cascade::from_bytes(v) let cascade = Cascade::from_bytes(v)
.expect("parsing Cascade should succeed") .expect("parsing Cascade should succeed")
.expect("Cascade should be Some"); .expect("Cascade should be Some");
@ -473,5 +492,12 @@ mod tests {
assert!(cascade.has(b"this") == true); assert!(cascade.has(b"this") == true);
assert!(cascade.has(b"that") == true); assert!(cascade.has(b"that") == true);
assert!(cascade.has(b"other") == false); assert!(cascade.has(b"other") == false);
assert_eq!(cascade.approximate_size_of(), 10247);
}
#[test]
fn cascade_empty() {
let cascade = Cascade::from_bytes(Vec::new()).expect("parsing Cascade should succeed");
assert!(cascade.is_none());
} }
} }

Просмотреть файл

@ -6,7 +6,6 @@ authors = ["Rob Wu <rob@robwu.nl>"]
[dependencies] [dependencies]
nserror = { path = "../../../xpcom/rust/nserror" } nserror = { path = "../../../xpcom/rust/nserror" }
nsstring = { path = "../../../xpcom/rust/nsstring" } nsstring = { path = "../../../xpcom/rust/nsstring" }
rental = "0.5.5" rust_cascade = "1.2.0"
rust_cascade = "0.6.0"
thin-vec = { version = "0.2.1", features = ["gecko-ffi"] } thin-vec = { version = "0.2.1", features = ["gecko-ffi"] }
xpcom = { path = "../../../xpcom/rust/xpcom" } xpcom = { path = "../../../xpcom/rust/xpcom" }

Просмотреть файл

@ -4,8 +4,6 @@
extern crate nserror; extern crate nserror;
extern crate nsstring; extern crate nsstring;
#[macro_use]
extern crate rental;
extern crate rust_cascade; extern crate rust_cascade;
extern crate thin_vec; extern crate thin_vec;
#[macro_use] #[macro_use]
@ -19,25 +17,11 @@ use thin_vec::ThinVec;
use xpcom::interfaces::nsICascadeFilter; use xpcom::interfaces::nsICascadeFilter;
use xpcom::{xpcom_method, RefPtr}; use xpcom::{xpcom_method, RefPtr};
// Cascade does not take ownership of the data, so we must own the data in order to pass its
// reference to Cascade.
rental! {
mod rentals {
use super::Cascade;
#[rental]
pub struct CascadeWithOwnedData {
owndata: Box<[u8]>,
cascade: Box<Cascade<'owndata>>,
}
}
}
#[derive(xpcom)] #[derive(xpcom)]
#[xpimplements(nsICascadeFilter)] #[xpimplements(nsICascadeFilter)]
#[refcnt = "nonatomic"] #[refcnt = "nonatomic"]
pub struct InitCascadeFilter { pub struct InitCascadeFilter {
filter: RefCell<Option<rentals::CascadeWithOwnedData>>, filter: RefCell<Option<Cascade>>,
} }
impl CascadeFilter { impl CascadeFilter {
@ -49,14 +33,9 @@ impl CascadeFilter {
xpcom_method!(set_filter_data => SetFilterData(data: *const ThinVec<u8>)); xpcom_method!(set_filter_data => SetFilterData(data: *const ThinVec<u8>));
fn set_filter_data(&self, data: &ThinVec<u8>) -> Result<(), nsresult> { fn set_filter_data(&self, data: &ThinVec<u8>) -> Result<(), nsresult> {
let filter = rentals::CascadeWithOwnedData::try_new_or_drop( let filter = *Cascade::from_bytes(data.to_vec())
Vec::from(data.as_slice()).into_boxed_slice(), .unwrap_or(None)
|data| { .ok_or(NS_ERROR_INVALID_ARG)?;
Cascade::from_bytes(data)
.unwrap_or(None)
.ok_or(NS_ERROR_INVALID_ARG)
},
)?;
self.filter.borrow_mut().replace(filter); self.filter.borrow_mut().replace(filter);
Ok(()) Ok(())
} }
@ -66,7 +45,7 @@ impl CascadeFilter {
fn has(&self, key: &nsACString) -> Result<bool, nsresult> { fn has(&self, key: &nsACString) -> Result<bool, nsresult> {
match self.filter.borrow().as_ref() { match self.filter.borrow().as_ref() {
None => Err(NS_ERROR_NOT_INITIALIZED), None => Err(NS_ERROR_NOT_INITIALIZED),
Some(filter) => Ok(filter.rent(|cascade| cascade.has(&*key))), Some(filter) => Ok(filter.has(&*key)),
} }
} }
} }