зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1615974 - avoid memmapping CRLite filters in cert_storage r=jschanck,robwu
Differential Revision: https://phabricator.services.mozilla.com/D140266
This commit is contained in:
Родитель
89abef6e8a
Коммит
23c938c2f3
|
@ -611,7 +611,6 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"nserror",
|
||||
"nsstring",
|
||||
"rental",
|
||||
"rust_cascade",
|
||||
"thin-vec",
|
||||
"xpcom",
|
||||
|
@ -632,12 +631,9 @@ dependencies = [
|
|||
"crossbeam-utils 0.8.6",
|
||||
"cstr",
|
||||
"log",
|
||||
"malloc_size_of_derive",
|
||||
"memmap2 0.3.1",
|
||||
"moz_task",
|
||||
"nserror",
|
||||
"nsstring",
|
||||
"rental",
|
||||
"rkv",
|
||||
"rust_cascade",
|
||||
"sha2",
|
||||
|
@ -4431,9 +4427,9 @@ checksum = "8a654c5bda722c699be6b0fe4c0d90de218928da5b724c3e467fc48865c37263"
|
|||
|
||||
[[package]]
|
||||
name = "rust_cascade"
|
||||
version = "0.6.0"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a5b9bba8f5b985e4923dadd273a987f83669083f3355d65c699e02b9d3d854d"
|
||||
checksum = "d09c17a9310f1eb79a67d307adffa7fa1c5943eaadcc21d4fb7f611536d66c4f"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"digest",
|
||||
|
|
|
@ -9,18 +9,15 @@ byteorder = "1.2.7"
|
|||
crossbeam-utils = "0.8"
|
||||
cstr = "0.2"
|
||||
log = "0.4"
|
||||
memmap2 = "0.3"
|
||||
moz_task = { path = "../../../../xpcom/rust/moz_task" }
|
||||
nserror = { path = "../../../../xpcom/rust/nserror" }
|
||||
nsstring = { path = "../../../../xpcom/rust/nsstring" }
|
||||
rental = "0.5.5"
|
||||
rkv = { version = "0.17", default-features = false }
|
||||
rust_cascade = "0.6.0"
|
||||
rust_cascade = "1.2.0"
|
||||
sha2 = "^0.8"
|
||||
storage_variant = { path = "../../../../storage/variant" }
|
||||
tempfile = "3"
|
||||
thin-vec = { version = "0.2.1", features = ["gecko-ffi"] }
|
||||
time = "0.1"
|
||||
xpcom = { path = "../../../../xpcom/rust/xpcom" }
|
||||
malloc_size_of_derive = "0.1"
|
||||
wr_malloc_size_of = { path = "../../../../gfx/wr/wr_malloc_size_of" }
|
||||
|
|
|
@ -9,12 +9,9 @@ extern crate crossbeam_utils;
|
|||
extern crate cstr;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memmap2;
|
||||
extern crate moz_task;
|
||||
extern crate nserror;
|
||||
extern crate nsstring;
|
||||
#[macro_use]
|
||||
extern crate rental;
|
||||
extern crate rkv;
|
||||
extern crate rust_cascade;
|
||||
extern crate sha2;
|
||||
|
@ -22,8 +19,6 @@ extern crate thin_vec;
|
|||
extern crate time;
|
||||
#[macro_use]
|
||||
extern crate xpcom;
|
||||
#[macro_use]
|
||||
extern crate malloc_size_of_derive;
|
||||
extern crate storage_variant;
|
||||
extern crate tempfile;
|
||||
|
||||
|
@ -33,7 +28,6 @@ use wr_malloc_size_of as malloc_size_of;
|
|||
use byteorder::{LittleEndian, NetworkEndian, ReadBytesExt, WriteBytesExt};
|
||||
use crossbeam_utils::atomic::AtomicCell;
|
||||
use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
|
||||
use memmap2::Mmap;
|
||||
use moz_task::{create_background_task_queue, is_main_thread, Task, TaskRunnable};
|
||||
use nserror::{
|
||||
nsresult, NS_ERROR_FAILURE, NS_ERROR_NOT_SAME_THREAD, NS_ERROR_NO_AGGREGATION,
|
||||
|
@ -132,30 +126,12 @@ impl MallocSizeOf for EnvAndStore {
|
|||
}
|
||||
}
|
||||
|
||||
// In Rust, structs cannot have self references (if a struct gets moved, the compiler has no
|
||||
// guarantees that the references are still valid). In our case, since the memmapped data is at a
|
||||
// particular place in memory (and that's what we're referencing), we can use the rental crate to
|
||||
// create a struct that does reference itself.
|
||||
rental! {
|
||||
mod holding {
|
||||
use super::{Cascade, Mmap};
|
||||
|
||||
#[rental]
|
||||
pub struct CRLiteFilter {
|
||||
backing_file: Box<Mmap>,
|
||||
cascade: Box<Cascade<'backing_file>>,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `SecurityState`
|
||||
#[derive(MallocSizeOf)]
|
||||
struct SecurityState {
|
||||
profile_path: PathBuf,
|
||||
env_and_store: Option<EnvAndStore>,
|
||||
int_prefs: HashMap<String, u32>,
|
||||
#[ignore_malloc_size_of = "rental crate does not allow impls for rental structs"]
|
||||
crlite_filter: Option<holding::CRLiteFilter>,
|
||||
crlite_filter: Option<Cascade>,
|
||||
/// Maps issuer spki hashes to sets of serial numbers.
|
||||
crlite_stash: Option<HashMap<Vec<u8>, HashSet<Vec<u8>>>>,
|
||||
/// Maps an RFC 6962 LogID to a pair of 64 bit unix timestamps
|
||||
|
@ -529,15 +505,12 @@ impl SecurityState {
|
|||
if !path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
let filter_file = File::open(path)?;
|
||||
let mmap = unsafe { Mmap::map(&filter_file)? };
|
||||
let crlite_filter = holding::CRLiteFilter::try_new(Box::new(mmap), |mmap| {
|
||||
match Cascade::from_bytes(mmap)? {
|
||||
Some(cascade) => Ok(cascade),
|
||||
None => Err(SecurityStateError::from("invalid CRLite filter")),
|
||||
}
|
||||
})
|
||||
.map_err(|_| SecurityStateError::from("unable to initialize CRLite filter"))?;
|
||||
let mut filter_file = File::open(path)?;
|
||||
let mut filter_bytes = Vec::new();
|
||||
let _ = filter_file.read_to_end(&mut filter_bytes)?;
|
||||
let crlite_filter = *Cascade::from_bytes(filter_bytes)
|
||||
.map_err(|_| SecurityStateError::from("invalid CRLite filter"))?
|
||||
.ok_or(SecurityStateError::from("expecting non-empty filter"))?;
|
||||
|
||||
let mut path = get_store_path(&self.profile_path)?;
|
||||
path.push("crlite.coverage");
|
||||
|
@ -672,7 +645,7 @@ impl SecurityState {
|
|||
lookup_key.extend_from_slice(serial_number);
|
||||
debug!("CRLite lookup key: {:?}", lookup_key);
|
||||
let result = match &self.crlite_filter {
|
||||
Some(crlite_filter) => crlite_filter.rent(|filter| filter.has(&lookup_key)),
|
||||
Some(crlite_filter) => crlite_filter.has(&lookup_key),
|
||||
// This can only happen if the backing file was deleted or if it or our database has
|
||||
// become corrupted. In any case, we have no information.
|
||||
None => return nsICertStorage::STATE_NOT_COVERED,
|
||||
|
@ -881,6 +854,21 @@ impl SecurityState {
|
|||
}
|
||||
}
|
||||
|
||||
impl MallocSizeOf for SecurityState {
|
||||
fn size_of(&self, ops: &mut MallocSizeOfOps) -> usize {
|
||||
self.profile_path.size_of(ops)
|
||||
+ self.env_and_store.size_of(ops)
|
||||
+ self.int_prefs.size_of(ops)
|
||||
+ self
|
||||
.crlite_filter
|
||||
.as_ref()
|
||||
.map_or(0, |crlite_filter| crlite_filter.approximate_size_of())
|
||||
+ self.crlite_stash.size_of(ops)
|
||||
+ self.crlite_coverage.size_of(ops)
|
||||
+ self.remaining_ops.size_of(ops)
|
||||
}
|
||||
}
|
||||
|
||||
const CERT_SERIALIZATION_VERSION_1: u8 = 1;
|
||||
|
||||
// A Cert consists of its DER encoding, its DER-encoded subject, and its trust (currently
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"files":{"Cargo.toml":"411cb740d6be8346206164df646ac9df304e9a84bb9f10eb4b07d2ef2f6566ec","README.md":"a4396d1adf63a77ae9aa0d1d850d02d09eec4a92810a52d675163688f312b3e8","license.txt":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/lib.rs":"2c6d1e01ae3a39baad99cd4567b0164dec4dcf77688bc2c3b43798215c857943","test_data/make-sample-data.py":"68bcb106c3ac1929da52e1abb71cd2a6d59eb79549f6e40042368161baa920e0","test_data/requirements.txt":"cb9372b33ed2774e0d5040459fd63a2f9abae2be599869be43a2a077b2c08aa3","test_data/test_v1_murmur_mlbf":"243df0b7f2f55bfe3cefbba2d4be5eb7957c0a063559c9f284ca4c1ee4211eb5","test_data/test_v1_murmur_short_mlbf":"3d4f03dc0a65cf5800efed6ac0b3c73e5b61e5d62bc82ac42744abc67f4c30fa","test_data/test_v2_murmur_inverted_mlbf":"efdd0ab309883f6a3148ec2ddaf0dcb768790e6f130e4e0556994202b1fd7cc4","test_data/test_v2_murmur_mlbf":"80e8e148fbf95aed39783f1fcc2d4576074f8c487656ca2d53571da4b17e20a9","test_data/test_v2_sha256_inverted_mlbf":"e5148cabb45c4899f8220ca51f96a6c76c688e39dfd340ae56bf9dc5226eada2","test_data/test_v2_sha256_mlbf":"08986847b8b2f3bdf4d2df51e465938f88f7a7c401b1740094fc40b033e80b51","test_data/test_v2_sha256_salt_mlbf":"d7b9bf88872162a1917eb14d0340a88b61b574fb1a7120fa54d061e43a9f5460"},"package":"9a5b9bba8f5b985e4923dadd273a987f83669083f3355d65c699e02b9d3d854d"}
|
||||
{"files":{"Cargo.toml":"001e85e1a2fb801d92db560e0c6abbdfbff246c0bce600e1908f674819acb1d7","README.md":"a4396d1adf63a77ae9aa0d1d850d02d09eec4a92810a52d675163688f312b3e8","license.txt":"1f256ecad192880510e84ad60474eab7589218784b9a50bc7ceee34c2b91f1d5","src/lib.rs":"ea99597d605feb5a33fbe678ae86fae525042ce48704383e61ee54dd95e0e854","test_data/make-sample-data.py":"68bcb106c3ac1929da52e1abb71cd2a6d59eb79549f6e40042368161baa920e0","test_data/requirements.txt":"cb9372b33ed2774e0d5040459fd63a2f9abae2be599869be43a2a077b2c08aa3","test_data/test_v1_murmur_mlbf":"243df0b7f2f55bfe3cefbba2d4be5eb7957c0a063559c9f284ca4c1ee4211eb5","test_data/test_v1_murmur_short_mlbf":"3d4f03dc0a65cf5800efed6ac0b3c73e5b61e5d62bc82ac42744abc67f4c30fa","test_data/test_v2_murmur_inverted_mlbf":"efdd0ab309883f6a3148ec2ddaf0dcb768790e6f130e4e0556994202b1fd7cc4","test_data/test_v2_murmur_mlbf":"80e8e148fbf95aed39783f1fcc2d4576074f8c487656ca2d53571da4b17e20a9","test_data/test_v2_sha256_inverted_mlbf":"e5148cabb45c4899f8220ca51f96a6c76c688e39dfd340ae56bf9dc5226eada2","test_data/test_v2_sha256_mlbf":"08986847b8b2f3bdf4d2df51e465938f88f7a7c401b1740094fc40b033e80b51","test_data/test_v2_sha256_salt_mlbf":"d7b9bf88872162a1917eb14d0340a88b61b574fb1a7120fa54d061e43a9f5460"},"package":"d09c17a9310f1eb79a67d307adffa7fa1c5943eaadcc21d4fb7f611536d66c4f"}
|
|
@ -3,18 +3,17 @@
|
|||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
name = "rust_cascade"
|
||||
version = "0.6.0"
|
||||
version = "1.2.0"
|
||||
authors = ["Mark Goodwin <mgoodwin@mozilla.com>", "Dana Keeler <dkeeler@mozilla.com>", "J.C. Jones <jc@mozilla.com>"]
|
||||
description = "A simple mmh3 based bloom filter cascade implementation in Rust."
|
||||
description = "A simple bloom filter cascade implementation in Rust."
|
||||
homepage = "https://github.com/mozilla/rust-cascade"
|
||||
documentation = "https://docs.rs/rust_cascade/"
|
||||
license = "MPL-2.0"
|
||||
|
|
|
@ -8,25 +8,26 @@ use murmurhash3::murmurhash3_x86_32;
|
|||
use sha2::{Digest, Sha256};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::fmt;
|
||||
use std::io::{Error, ErrorKind};
|
||||
use std::io::{Error, ErrorKind, Read};
|
||||
use std::mem::size_of;
|
||||
|
||||
/// Helper struct to provide read-only bit access to a slice of bytes.
|
||||
struct BitSlice<'a> {
|
||||
/// The slice of bytes we're interested in.
|
||||
bytes: &'a [u8],
|
||||
/// The number of bits that are valid to access in the slice.
|
||||
/// Helper struct to provide read-only bit access to a vector of bytes.
|
||||
struct BitVector {
|
||||
/// The bytes we're interested in.
|
||||
bytes: Vec<u8>,
|
||||
/// The number of bits that are valid to access in the vector.
|
||||
/// Not necessarily equal to `bytes.len() * 8`, but it will not be greater than that.
|
||||
bit_len: usize,
|
||||
}
|
||||
|
||||
impl<'a> BitSlice<'a> {
|
||||
/// Creates a new `BitSlice` of the given bit length over the given slice of data.
|
||||
/// Panics if the indicated bit length is larger than fits in the slice.
|
||||
impl BitVector {
|
||||
/// Creates a new `BitVector` of the given bit length over the given data.
|
||||
/// Panics if the indicated bit length is larger than fits in the vector.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `bytes` - The slice of bytes we need bit-access to
|
||||
/// * `bit_len` - The number of bits that are valid to access in the slice
|
||||
fn new(bytes: &'a [u8], bit_len: usize) -> BitSlice<'a> {
|
||||
/// * `bytes` - The bytes we need bit-access to
|
||||
/// * `bit_len` - The number of bits that are valid to access in the vector
|
||||
fn new(bytes: Vec<u8>, bit_len: usize) -> BitVector {
|
||||
if bit_len > bytes.len() * 8 {
|
||||
panic!(
|
||||
"bit_len too large for given data: {} > {} * 8",
|
||||
|
@ -34,7 +35,7 @@ impl<'a> BitSlice<'a> {
|
|||
bytes.len()
|
||||
);
|
||||
}
|
||||
BitSlice { bytes, bit_len }
|
||||
BitVector { bytes, bit_len }
|
||||
}
|
||||
|
||||
/// Get the value of the specified bit.
|
||||
|
@ -45,7 +46,7 @@ impl<'a> BitSlice<'a> {
|
|||
fn get(&self, bit_index: usize) -> bool {
|
||||
if bit_index >= self.bit_len {
|
||||
panic!(
|
||||
"bit index out of range for bit slice: {} >= {}",
|
||||
"bit index out of range for bit vector: {} >= {}",
|
||||
bit_index, self.bit_len
|
||||
);
|
||||
}
|
||||
|
@ -68,7 +69,7 @@ impl<'a> BitSlice<'a> {
|
|||
}
|
||||
|
||||
/// A Bloom filter representing a specific level in a multi-level cascading Bloom filter.
|
||||
struct Bloom<'a> {
|
||||
struct Bloom {
|
||||
/// What level this filter is in
|
||||
level: u8,
|
||||
/// How many hash functions this filter uses
|
||||
|
@ -76,7 +77,7 @@ struct Bloom<'a> {
|
|||
/// The bit length of the filter
|
||||
size: u32,
|
||||
/// The data of the filter
|
||||
bit_slice: BitSlice<'a>,
|
||||
bit_vector: BitVector,
|
||||
/// The hash algorithm enumeration in use
|
||||
hash_algorithm: HashAlgorithm,
|
||||
}
|
||||
|
@ -108,13 +109,12 @@ impl TryFrom<u8> for HashAlgorithm {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> Bloom<'a> {
|
||||
/// Attempts to decode and return a pair that consists of the Bloom filter represented by the
|
||||
/// given bytes and any remaining unprocessed bytes in the given bytes.
|
||||
impl Bloom {
|
||||
/// Attempts to decode the Bloom filter represented by the bytes in the given reader.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `bytes` - The encoded representation of this Bloom filter. May include additional data
|
||||
/// describing further Bloom filters. Any additional data is returned unconsumed.
|
||||
/// * `reader` - The encoded representation of this Bloom filter. May be empty. May include
|
||||
/// additional data describing further Bloom filters.
|
||||
/// The format of an encoded Bloom filter is:
|
||||
/// [1 byte] - the hash algorithm to use in the filter
|
||||
/// [4 little endian bytes] - the length in bits of the filter
|
||||
|
@ -122,11 +122,15 @@ impl<'a> Bloom<'a> {
|
|||
/// [1 byte] - which level in the cascade this filter is
|
||||
/// [variable length bytes] - the filter itself (the length is determined by Ceiling(bit length
|
||||
/// / 8)
|
||||
pub fn from_bytes(bytes: &'a [u8]) -> Result<(Bloom<'a>, &'a [u8]), Error> {
|
||||
let mut cursor = bytes;
|
||||
pub fn read<R: Read>(reader: &mut R) -> Result<Option<Bloom>, Error> {
|
||||
// Load the layer metadata. bloomer.py writes size, nHashFuncs and level as little-endian
|
||||
// unsigned ints.
|
||||
let hash_algorithm_val = cursor.read_u8()?;
|
||||
let hash_algorithm_val = match reader.read_u8() {
|
||||
Ok(val) => val,
|
||||
// If reader is at EOF, there is no bloom filter.
|
||||
Err(e) if e.kind() == ErrorKind::UnexpectedEof => return Ok(None),
|
||||
Err(e) => return Err(e),
|
||||
};
|
||||
let hash_algorithm = match HashAlgorithm::try_from(hash_algorithm_val) {
|
||||
Ok(algo) => algo,
|
||||
Err(()) => {
|
||||
|
@ -137,9 +141,9 @@ impl<'a> Bloom<'a> {
|
|||
}
|
||||
};
|
||||
|
||||
let size = cursor.read_u32::<byteorder::LittleEndian>()?;
|
||||
let n_hash_funcs = cursor.read_u32::<byteorder::LittleEndian>()?;
|
||||
let level = cursor.read_u8()?;
|
||||
let size = reader.read_u32::<byteorder::LittleEndian>()?;
|
||||
let n_hash_funcs = reader.read_u32::<byteorder::LittleEndian>()?;
|
||||
let level = reader.read_u8()?;
|
||||
|
||||
let shifted_size = size.wrapping_shr(3) as usize;
|
||||
let byte_count = if size % 8 != 0 {
|
||||
|
@ -147,24 +151,19 @@ impl<'a> Bloom<'a> {
|
|||
} else {
|
||||
shifted_size
|
||||
};
|
||||
if byte_count > cursor.len() {
|
||||
return Err(Error::new(
|
||||
ErrorKind::InvalidData,
|
||||
"Invalid Bloom filter: too short",
|
||||
));
|
||||
}
|
||||
let (bits_bytes, rest_of_bytes) = cursor.split_at(byte_count);
|
||||
let mut bits_bytes = vec![0; byte_count];
|
||||
reader.read_exact(&mut bits_bytes)?;
|
||||
let bloom = Bloom {
|
||||
level,
|
||||
n_hash_funcs,
|
||||
size,
|
||||
bit_slice: BitSlice::new(bits_bytes, size as usize),
|
||||
bit_vector: BitVector::new(bits_bytes, size as usize),
|
||||
hash_algorithm,
|
||||
};
|
||||
Ok((bloom, rest_of_bytes))
|
||||
Ok(Some(bloom))
|
||||
}
|
||||
|
||||
fn hash(&self, n_fn: u32, key: &[u8], salt: Option<&[u8]>) -> u32 {
|
||||
fn hash(&self, n_fn: u32, key: &[u8], salt: Option<&Vec<u8>>) -> u32 {
|
||||
match self.hash_algorithm {
|
||||
HashAlgorithm::MurmurHash3 => {
|
||||
if salt.is_some() {
|
||||
|
@ -195,9 +194,9 @@ impl<'a> Bloom<'a> {
|
|||
///
|
||||
/// # Arguments
|
||||
/// `item` - The slice of bytes to test for
|
||||
pub fn has(&self, item: &[u8], salt: Option<&[u8]>) -> bool {
|
||||
fn has(&self, item: &[u8], salt: Option<&Vec<u8>>) -> bool {
|
||||
for i in 0..self.n_hash_funcs {
|
||||
if !self.bit_slice.get(self.hash(i, item, salt) as usize) {
|
||||
if !self.bit_vector.get(self.hash(i, item, salt) as usize) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -205,7 +204,7 @@ impl<'a> Bloom<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for Bloom<'a> {
|
||||
impl fmt::Display for Bloom {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
|
@ -216,21 +215,19 @@ impl<'a> fmt::Display for Bloom<'a> {
|
|||
}
|
||||
|
||||
/// A multi-level cascading Bloom filter.
|
||||
pub struct Cascade<'a> {
|
||||
pub struct Cascade {
|
||||
/// The Bloom filter for this level in the cascade
|
||||
filter: Bloom<'a>,
|
||||
filter: Bloom,
|
||||
/// The next (lower) level in the cascade
|
||||
child_layer: Option<Box<Cascade<'a>>>,
|
||||
child_layer: Option<Box<Cascade>>,
|
||||
/// The salt in use, if any
|
||||
salt: Option<&'a [u8]>,
|
||||
salt: Option<Vec<u8>>,
|
||||
/// Whether the logic should be inverted
|
||||
inverted: bool,
|
||||
}
|
||||
|
||||
impl<'a> Cascade<'a> {
|
||||
/// Attempts to decode and return a multi-level cascading Bloom filter. NB: `Cascade` does not
|
||||
/// take ownership of the given data. This is to facilitate decoding cascading filters
|
||||
/// backed by memory-mapped files.
|
||||
impl Cascade {
|
||||
/// Attempts to decode and return a multi-level cascading Bloom filter.
|
||||
///
|
||||
/// # Arguments
|
||||
/// `bytes` - The encoded representation of the Bloom filters in this cascade. Starts with 2
|
||||
|
@ -239,31 +236,23 @@ impl<'a> Cascade<'a> {
|
|||
/// https://github.com/mozilla/filter-cascade/blob/v0.3.0/filtercascade/fileformats.py
|
||||
///
|
||||
/// May be of length 0, in which case `None` is returned.
|
||||
pub fn from_bytes(bytes: &'a [u8]) -> Result<Option<Box<Cascade<'a>>>, Error> {
|
||||
pub fn from_bytes(bytes: Vec<u8>) -> Result<Option<Box<Cascade>>, Error> {
|
||||
if bytes.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let mut cursor = bytes;
|
||||
let version = cursor.read_u16::<byteorder::LittleEndian>()?;
|
||||
let mut reader = bytes.as_slice();
|
||||
let version = reader.read_u16::<byteorder::LittleEndian>()?;
|
||||
let mut salt = None;
|
||||
let mut inverted = false;
|
||||
|
||||
if version >= 2 {
|
||||
inverted = cursor.read_u8()? != 0;
|
||||
let salt_len = cursor.read_u8()? as usize;
|
||||
|
||||
if salt_len > cursor.len() {
|
||||
return Err(Error::new(
|
||||
ErrorKind::InvalidData,
|
||||
"Invalid Bloom filter: too short",
|
||||
));
|
||||
}
|
||||
|
||||
let (salt_bytes, remaining_bytes) = cursor.split_at(salt_len);
|
||||
inverted = reader.read_u8()? != 0;
|
||||
let salt_len = reader.read_u8()? as usize;
|
||||
if salt_len > 0 {
|
||||
salt = Some(salt_bytes)
|
||||
let mut salt_bytes = vec![0; salt_len];
|
||||
reader.read_exact(&mut salt_bytes)?;
|
||||
salt = Some(salt_bytes);
|
||||
}
|
||||
cursor = remaining_bytes;
|
||||
}
|
||||
|
||||
if version > 2 {
|
||||
|
@ -273,22 +262,23 @@ impl<'a> Cascade<'a> {
|
|||
));
|
||||
}
|
||||
|
||||
Cascade::child_layer_from_bytes(cursor, salt, inverted)
|
||||
Cascade::child_layer_from_bytes(reader, salt, inverted)
|
||||
}
|
||||
|
||||
fn child_layer_from_bytes(
|
||||
bytes: &'a [u8],
|
||||
salt: Option<&'a [u8]>,
|
||||
fn child_layer_from_bytes<R: Read>(
|
||||
mut reader: R,
|
||||
salt: Option<Vec<u8>>,
|
||||
inverted: bool,
|
||||
) -> Result<Option<Box<Cascade<'a>>>, Error> {
|
||||
if bytes.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
let (filter, rest_of_bytes) = Bloom::from_bytes(bytes)?;
|
||||
) -> Result<Option<Box<Cascade>>, Error> {
|
||||
let filter = match Bloom::read(&mut reader)? {
|
||||
Some(filter) => filter,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let our_salt = salt.as_ref().cloned();
|
||||
Ok(Some(Box::new(Cascade {
|
||||
filter,
|
||||
child_layer: Cascade::child_layer_from_bytes(rest_of_bytes, salt, inverted)?,
|
||||
salt,
|
||||
child_layer: Cascade::child_layer_from_bytes(reader, salt, inverted)?,
|
||||
salt: our_salt,
|
||||
inverted,
|
||||
})))
|
||||
}
|
||||
|
@ -305,8 +295,8 @@ impl<'a> Cascade<'a> {
|
|||
result
|
||||
}
|
||||
|
||||
pub fn has_internal(&self, entry: &[u8]) -> bool {
|
||||
if self.filter.has(&entry, self.salt) {
|
||||
fn has_internal(&self, entry: &[u8]) -> bool {
|
||||
if self.filter.has(entry, self.salt.as_ref()) {
|
||||
match self.child_layer {
|
||||
Some(ref child) => {
|
||||
let child_value = !child.has_internal(entry);
|
||||
|
@ -319,9 +309,25 @@ impl<'a> Cascade<'a> {
|
|||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Determine the approximate amount of memory in bytes used by this
|
||||
/// Cascade. Because this implementation does not integrate with the
|
||||
/// allocator, it can't get an accurate measurement of how much memory it
|
||||
/// uses. However, it can make a reasonable guess, assuming the sizes of
|
||||
/// the bloom filters are large enough to dominate the overall allocated
|
||||
/// size.
|
||||
pub fn approximate_size_of(&self) -> usize {
|
||||
size_of::<Cascade>()
|
||||
+ self.filter.bit_vector.bytes.len()
|
||||
+ self
|
||||
.child_layer
|
||||
.as_ref()
|
||||
.map_or(0, |child_layer| child_layer.approximate_size_of())
|
||||
+ self.salt.as_ref().map_or(0, |salt| salt.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Display for Cascade<'a> {
|
||||
impl fmt::Display for Cascade {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
|
@ -345,34 +351,41 @@ mod tests {
|
|||
let src: Vec<u8> = vec![
|
||||
0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41, 0x00,
|
||||
];
|
||||
let mut reader = src.as_slice();
|
||||
|
||||
match Bloom::from_bytes(&src) {
|
||||
Ok((bloom, rest_of_bytes)) => {
|
||||
assert!(rest_of_bytes.len() == 0);
|
||||
match Bloom::read(&mut reader) {
|
||||
Ok(Some(bloom)) => {
|
||||
assert!(bloom.has(b"this", None) == true);
|
||||
assert!(bloom.has(b"that", None) == true);
|
||||
assert!(bloom.has(b"other", None) == false);
|
||||
}
|
||||
Err(_) => {
|
||||
panic!("Parsing failed");
|
||||
}
|
||||
Ok(None) => panic!("Parsing failed"),
|
||||
Err(_) => panic!("Parsing failed"),
|
||||
};
|
||||
assert!(reader.is_empty());
|
||||
|
||||
let short: Vec<u8> = vec![
|
||||
0x01, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x41,
|
||||
];
|
||||
assert!(Bloom::from_bytes(&short).is_err());
|
||||
assert!(Bloom::read(&mut short.as_slice()).is_err());
|
||||
|
||||
let empty: Vec<u8> = Vec::new();
|
||||
let mut reader = empty.as_slice();
|
||||
match Bloom::read(&mut reader) {
|
||||
Ok(should_be_none) => assert!(should_be_none.is_none()),
|
||||
Err(_) => panic!("Parsing failed"),
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bloom_v3_unsupported() {
|
||||
let src: Vec<u8> = vec![0x03, 0x01, 0x00];
|
||||
assert!(Bloom::from_bytes(&src).is_err());
|
||||
assert!(Bloom::read(&mut src.as_slice()).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cascade_v1_murmur_from_file_bytes_test() {
|
||||
let v = include_bytes!("../test_data/test_v1_murmur_mlbf");
|
||||
let v = include_bytes!("../test_data/test_v1_murmur_mlbf").to_vec();
|
||||
let cascade = Cascade::from_bytes(v)
|
||||
.expect("parsing Cascade should succeed")
|
||||
.expect("Cascade should be Some");
|
||||
|
@ -401,13 +414,15 @@ mod tests {
|
|||
0x77, 0x8e ];
|
||||
assert!(!cascade.has(&key_for_valid_cert));
|
||||
|
||||
let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf");
|
||||
assert_eq!(cascade.approximate_size_of(), 15632);
|
||||
|
||||
let v = include_bytes!("../test_data/test_v1_murmur_short_mlbf").to_vec();
|
||||
assert!(Cascade::from_bytes(v).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cascade_v2_sha256_from_file_bytes_test() {
|
||||
let v = include_bytes!("../test_data/test_v2_sha256_mlbf");
|
||||
let v = include_bytes!("../test_data/test_v2_sha256_mlbf").to_vec();
|
||||
let cascade = Cascade::from_bytes(v)
|
||||
.expect("parsing Cascade should succeed")
|
||||
.expect("Cascade should be Some");
|
||||
|
@ -417,25 +432,27 @@ mod tests {
|
|||
assert!(cascade.has(b"this") == true);
|
||||
assert!(cascade.has(b"that") == true);
|
||||
assert!(cascade.has(b"other") == false);
|
||||
assert_eq!(cascade.approximate_size_of(), 10247);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cascade_v2_sha256_with_salt_from_file_bytes_test() {
|
||||
let v = include_bytes!("../test_data/test_v2_sha256_salt_mlbf");
|
||||
let v = include_bytes!("../test_data/test_v2_sha256_salt_mlbf").to_vec();
|
||||
let cascade = Cascade::from_bytes(v)
|
||||
.expect("parsing Cascade should succeed")
|
||||
.expect("Cascade should be Some");
|
||||
|
||||
assert!(cascade.salt == Some(b"nacl"));
|
||||
assert!(cascade.salt == Some(b"nacl".to_vec()));
|
||||
assert!(cascade.inverted == false);
|
||||
assert!(cascade.has(b"this") == true);
|
||||
assert!(cascade.has(b"that") == true);
|
||||
assert!(cascade.has(b"other") == false);
|
||||
assert_eq!(cascade.approximate_size_of(), 10251);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cascade_v2_murmur_from_file_bytes_test() {
|
||||
let v = include_bytes!("../test_data/test_v2_murmur_mlbf");
|
||||
let v = include_bytes!("../test_data/test_v2_murmur_mlbf").to_vec();
|
||||
let cascade = Cascade::from_bytes(v)
|
||||
.expect("parsing Cascade should succeed")
|
||||
.expect("Cascade should be Some");
|
||||
|
@ -445,11 +462,12 @@ mod tests {
|
|||
assert!(cascade.has(b"this") == true);
|
||||
assert!(cascade.has(b"that") == true);
|
||||
assert!(cascade.has(b"other") == false);
|
||||
assert_eq!(cascade.approximate_size_of(), 10247);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cascade_v2_murmur_inverted_from_file_bytes_test() {
|
||||
let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf");
|
||||
let v = include_bytes!("../test_data/test_v2_murmur_inverted_mlbf").to_vec();
|
||||
let cascade = Cascade::from_bytes(v)
|
||||
.expect("parsing Cascade should succeed")
|
||||
.expect("Cascade should be Some");
|
||||
|
@ -459,11 +477,12 @@ mod tests {
|
|||
assert!(cascade.has(b"this") == true);
|
||||
assert!(cascade.has(b"that") == true);
|
||||
assert!(cascade.has(b"other") == false);
|
||||
assert_eq!(cascade.approximate_size_of(), 10247);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cascade_v2_sha256_inverted_from_file_bytes_test() {
|
||||
let v = include_bytes!("../test_data/test_v2_sha256_inverted_mlbf");
|
||||
let v = include_bytes!("../test_data/test_v2_sha256_inverted_mlbf").to_vec();
|
||||
let cascade = Cascade::from_bytes(v)
|
||||
.expect("parsing Cascade should succeed")
|
||||
.expect("Cascade should be Some");
|
||||
|
@ -473,5 +492,12 @@ mod tests {
|
|||
assert!(cascade.has(b"this") == true);
|
||||
assert!(cascade.has(b"that") == true);
|
||||
assert!(cascade.has(b"other") == false);
|
||||
assert_eq!(cascade.approximate_size_of(), 10247);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cascade_empty() {
|
||||
let cascade = Cascade::from_bytes(Vec::new()).expect("parsing Cascade should succeed");
|
||||
assert!(cascade.is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ authors = ["Rob Wu <rob@robwu.nl>"]
|
|||
[dependencies]
|
||||
nserror = { path = "../../../xpcom/rust/nserror" }
|
||||
nsstring = { path = "../../../xpcom/rust/nsstring" }
|
||||
rental = "0.5.5"
|
||||
rust_cascade = "0.6.0"
|
||||
rust_cascade = "1.2.0"
|
||||
thin-vec = { version = "0.2.1", features = ["gecko-ffi"] }
|
||||
xpcom = { path = "../../../xpcom/rust/xpcom" }
|
||||
|
|
|
@ -4,8 +4,6 @@
|
|||
|
||||
extern crate nserror;
|
||||
extern crate nsstring;
|
||||
#[macro_use]
|
||||
extern crate rental;
|
||||
extern crate rust_cascade;
|
||||
extern crate thin_vec;
|
||||
#[macro_use]
|
||||
|
@ -19,25 +17,11 @@ use thin_vec::ThinVec;
|
|||
use xpcom::interfaces::nsICascadeFilter;
|
||||
use xpcom::{xpcom_method, RefPtr};
|
||||
|
||||
// Cascade does not take ownership of the data, so we must own the data in order to pass its
|
||||
// reference to Cascade.
|
||||
rental! {
|
||||
mod rentals {
|
||||
use super::Cascade;
|
||||
|
||||
#[rental]
|
||||
pub struct CascadeWithOwnedData {
|
||||
owndata: Box<[u8]>,
|
||||
cascade: Box<Cascade<'owndata>>,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(xpcom)]
|
||||
#[xpimplements(nsICascadeFilter)]
|
||||
#[refcnt = "nonatomic"]
|
||||
pub struct InitCascadeFilter {
|
||||
filter: RefCell<Option<rentals::CascadeWithOwnedData>>,
|
||||
filter: RefCell<Option<Cascade>>,
|
||||
}
|
||||
|
||||
impl CascadeFilter {
|
||||
|
@ -49,14 +33,9 @@ impl CascadeFilter {
|
|||
xpcom_method!(set_filter_data => SetFilterData(data: *const ThinVec<u8>));
|
||||
|
||||
fn set_filter_data(&self, data: &ThinVec<u8>) -> Result<(), nsresult> {
|
||||
let filter = rentals::CascadeWithOwnedData::try_new_or_drop(
|
||||
Vec::from(data.as_slice()).into_boxed_slice(),
|
||||
|data| {
|
||||
Cascade::from_bytes(data)
|
||||
let filter = *Cascade::from_bytes(data.to_vec())
|
||||
.unwrap_or(None)
|
||||
.ok_or(NS_ERROR_INVALID_ARG)
|
||||
},
|
||||
)?;
|
||||
.ok_or(NS_ERROR_INVALID_ARG)?;
|
||||
self.filter.borrow_mut().replace(filter);
|
||||
Ok(())
|
||||
}
|
||||
|
@ -66,7 +45,7 @@ impl CascadeFilter {
|
|||
fn has(&self, key: &nsACString) -> Result<bool, nsresult> {
|
||||
match self.filter.borrow().as_ref() {
|
||||
None => Err(NS_ERROR_NOT_INITIALIZED),
|
||||
Some(filter) => Ok(filter.rent(|cascade| cascade.has(&*key))),
|
||||
Some(filter) => Ok(filter.has(&*key)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче