diff --git a/Cargo.lock b/Cargo.lock index e1834fbf74d8..d4692a60f568 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1275,7 +1275,7 @@ checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" name = "fallible" version = "0.0.1" dependencies = [ - "hashbrown", + "hashglobe", "smallvec", ] @@ -2064,6 +2064,13 @@ dependencies = [ "autocfg 1.0.0", ] +[[package]] +name = "hashglobe" +version = "0.1.0" +dependencies = [ + "libc", +] + [[package]] name = "hawk" version = "3.1.1" @@ -2709,7 +2716,7 @@ dependencies = [ "app_units", "cssparser", "euclid", - "hashbrown", + "hashglobe", "selectors", "servo_arc", "smallbitvec", @@ -4529,6 +4536,7 @@ dependencies = [ "fallible", "fxhash", "hashbrown", + "hashglobe", "indexmap", "itertools", "itoa", diff --git a/servo/components/fallible/Cargo.toml b/servo/components/fallible/Cargo.toml index f3cf1e9640b2..27390149993c 100644 --- a/servo/components/fallible/Cargo.toml +++ b/servo/components/fallible/Cargo.toml @@ -10,5 +10,16 @@ name = "fallible" path = "lib.rs" [dependencies] -hashbrown = "0.7" +hashglobe = { path = "../hashglobe" } smallvec = "1.0" + +# This crate effectively does nothing except if the `known_system_malloc` +# feature is specified. +# +# In that case, we actually call the system malloc functions to reserve space, +# otherwise we just let rust do its thing (aborting on OOM). +# +# This is effectively a stop-gap measure until we can do this properly in +# stable rust. +[features] +known_system_malloc = [] diff --git a/servo/components/fallible/lib.rs b/servo/components/fallible/lib.rs index d2e3d9254b7d..1e01e8f80e9f 100644 --- a/servo/components/fallible/lib.rs +++ b/servo/components/fallible/lib.rs @@ -2,61 +2,20 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -extern crate hashbrown; +extern crate hashglobe; extern crate smallvec; -use hashbrown::hash_map::Entry; -use hashbrown::CollectionAllocErr; +#[cfg(feature = "known_system_malloc")] +use hashglobe::alloc; +use hashglobe::FailedAllocationError; use smallvec::Array; use smallvec::SmallVec; -use std::alloc::{self, Layout}; -use std::mem; -use std::ptr::copy_nonoverlapping; use std::vec::Vec; pub trait FallibleVec { /// Append |val| to the end of |vec|. Returns Ok(()) on success, /// Err(reason) if it fails, with |reason| describing the failure. - fn try_push(&mut self, value: T) -> Result<(), CollectionAllocErr>; -} - -pub trait FallibleHashMap { - fn try_insert(&mut self, k: K, v: V) -> Result, CollectionAllocErr>; - fn try_entry(&mut self, k: K) -> Result, CollectionAllocErr>; -} - -pub trait FallibleHashSet { - fn try_insert(&mut self, x: T) -> Result; -} - -impl FallibleHashMap for hashbrown::HashMap -where - K: Eq + std::hash::Hash, - H: std::hash::BuildHasher, -{ - #[inline] - fn try_insert(&mut self, k: K, v: V) -> Result, CollectionAllocErr> { - self.try_reserve(1)?; - Ok(self.insert(k, v)) - } - - #[inline] - fn try_entry(&mut self, k: K) -> Result, CollectionAllocErr> { - self.try_reserve(1)?; - Ok(self.entry(k)) - } -} - -impl FallibleHashSet for hashbrown::HashSet -where - T: Eq + std::hash::Hash, - H: std::hash::BuildHasher, -{ - #[inline] - fn try_insert(&mut self, x: T) -> Result { - self.try_reserve(1)?; - Ok(self.insert(x)) - } + fn try_push(&mut self, value: T) -> Result<(), FailedAllocationError>; } ///////////////////////////////////////////////////////////////// @@ -64,30 +23,26 @@ where impl FallibleVec for Vec { #[inline(always)] - fn try_push(&mut self, val: T) -> Result<(), CollectionAllocErr> { - if self.capacity() == self.len() { - try_double_vec(self)?; - debug_assert!(self.capacity() > self.len()); + fn try_push(&mut self, val: T) -> Result<(), FailedAllocationError> { + #[cfg(feature = "known_system_malloc")] + { + if self.capacity() == self.len() { + try_double_vec(self)?; + debug_assert!(self.capacity() > self.len()); + } } self.push(val); Ok(()) } } -/// FIXME: use `Layout::array` when it’s stable https://github.com/rust-lang/rust/issues/55724 -fn layout_array(n: usize) -> Result { - let size = n.checked_mul(mem::size_of::()) - .ok_or(CollectionAllocErr::CapacityOverflow)?; - let align = std::mem::align_of::(); - Layout::from_size_align(size, align) - .map_err(|_| CollectionAllocErr::CapacityOverflow) -} - // Double the capacity of |vec|, or fail to do so due to lack of memory. // Returns Ok(()) on success, Err(..) on failure. +#[cfg(feature = "known_system_malloc")] #[inline(never)] #[cold] -fn try_double_vec(vec: &mut Vec) -> Result<(), CollectionAllocErr> { +fn try_double_vec(vec: &mut Vec) -> Result<(), FailedAllocationError> { + use std::mem; let old_ptr = vec.as_mut_ptr(); let old_len = vec.len(); @@ -98,22 +53,25 @@ fn try_double_vec(vec: &mut Vec) -> Result<(), CollectionAllocErr> { } else { old_cap .checked_mul(2) - .ok_or(CollectionAllocErr::CapacityOverflow)? + .ok_or(FailedAllocationError::new("capacity overflow for Vec"))? }; - let old_layout = layout_array::(old_cap)?; - let new_layout = layout_array::(new_cap)?; + let new_size_bytes = new_cap + .checked_mul(mem::size_of::()) + .ok_or(FailedAllocationError::new("capacity overflow for Vec"))?; let new_ptr = unsafe { if old_cap == 0 { - alloc::alloc(new_layout) + alloc::alloc(new_size_bytes, 0) } else { - alloc::realloc(old_ptr as *mut u8, old_layout, new_layout.size()) + alloc::realloc(old_ptr as *mut u8, new_size_bytes) } }; if new_ptr.is_null() { - return Err(CollectionAllocErr::AllocErr { layout: new_layout }); + return Err(FailedAllocationError::new( + "out of memory when allocating Vec", + )); } let new_vec = unsafe { Vec::from_raw_parts(new_ptr as *mut T, old_len, new_cap) }; @@ -127,10 +85,13 @@ fn try_double_vec(vec: &mut Vec) -> Result<(), CollectionAllocErr> { impl FallibleVec for SmallVec { #[inline(always)] - fn try_push(&mut self, val: T::Item) -> Result<(), CollectionAllocErr> { - if self.capacity() == self.len() { - try_double_small_vec(self)?; - debug_assert!(self.capacity() > self.len()); + fn try_push(&mut self, val: T::Item) -> Result<(), FailedAllocationError> { + #[cfg(feature = "known_system_malloc")] + { + if self.capacity() == self.len() { + try_double_small_vec(self)?; + debug_assert!(self.capacity() > self.len()); + } } self.push(val); Ok(()) @@ -139,12 +100,16 @@ impl FallibleVec for SmallVec { // Double the capacity of |svec|, or fail to do so due to lack of memory. // Returns Ok(()) on success, Err(..) on failure. +#[cfg(feature = "known_system_malloc")] #[inline(never)] #[cold] -fn try_double_small_vec(svec: &mut SmallVec) -> Result<(), CollectionAllocErr> +fn try_double_small_vec(svec: &mut SmallVec) -> Result<(), FailedAllocationError> where T: Array, { + use std::mem; + use std::ptr::copy_nonoverlapping; + let old_ptr = svec.as_mut_ptr(); let old_len = svec.len(); @@ -154,33 +119,40 @@ where } else { old_cap .checked_mul(2) - .ok_or(CollectionAllocErr::CapacityOverflow)? + .ok_or(FailedAllocationError::new("capacity overflow for SmallVec"))? }; // This surely shouldn't fail, if |old_cap| was previously accepted as a // valid value. But err on the side of caution. - let old_layout = layout_array::(old_cap)?; - let new_layout = layout_array::(new_cap)?; + let old_size_bytes = old_cap + .checked_mul(mem::size_of::()) + .ok_or(FailedAllocationError::new("capacity overflow for SmallVec"))?; + + let new_size_bytes = new_cap + .checked_mul(mem::size_of::()) + .ok_or(FailedAllocationError::new("capacity overflow for SmallVec"))?; let new_ptr; if svec.spilled() { // There's an old block to free, and, presumably, old contents to // copy. realloc takes care of both aspects. unsafe { - new_ptr = alloc::realloc(old_ptr as *mut u8, old_layout, new_layout.size()); + new_ptr = alloc::realloc(old_ptr as *mut u8, new_size_bytes); } } else { // There's no old block to free. There may be old contents to copy. unsafe { - new_ptr = alloc::alloc(new_layout); - if !new_ptr.is_null() && old_layout.size() > 0 { - copy_nonoverlapping(old_ptr as *const u8, new_ptr as *mut u8, old_layout.size()); + new_ptr = alloc::alloc(new_size_bytes, 0); + if !new_ptr.is_null() && old_size_bytes > 0 { + copy_nonoverlapping(old_ptr as *const u8, new_ptr as *mut u8, old_size_bytes); } } } if new_ptr.is_null() { - return Err(CollectionAllocErr::AllocErr { layout: new_layout }); + return Err(FailedAllocationError::new( + "out of memory when allocating SmallVec", + )); } let new_vec = unsafe { Vec::from_raw_parts(new_ptr as *mut T::Item, old_len, new_cap) }; diff --git a/servo/components/hashglobe/Cargo.toml b/servo/components/hashglobe/Cargo.toml new file mode 100644 index 000000000000..49bc0126cff6 --- /dev/null +++ b/servo/components/hashglobe/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "hashglobe" +version = "0.1.0" +authors = ["The Rust Project Developers", "Manish Goregaokar "] +license = "MIT/Apache-2.0" +description = "Fork of std::HashMap with stable fallible allocation." +documentation = "https://docs.rs/hashglobe" +repository = "https://github.com/Manishearth/hashglobe" + +readme = "README.md" + +[dependencies] +libc = "0.2" + +[dev-dependencies] +rand = "0.7" diff --git a/servo/components/hashglobe/LICENSE-APACHE b/servo/components/hashglobe/LICENSE-APACHE new file mode 100644 index 000000000000..16fe87b06e80 --- /dev/null +++ b/servo/components/hashglobe/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/servo/components/hashglobe/LICENSE-MIT b/servo/components/hashglobe/LICENSE-MIT new file mode 100644 index 000000000000..31aa79387f27 --- /dev/null +++ b/servo/components/hashglobe/LICENSE-MIT @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/servo/components/hashglobe/README.md b/servo/components/hashglobe/README.md new file mode 100644 index 000000000000..e2f1df4fa974 --- /dev/null +++ b/servo/components/hashglobe/README.md @@ -0,0 +1,17 @@ +hashglobe +======== + + +This is a fork of Rust's `std::HashMap`. It works on stable out of the stdlib and has fallible APIs. + +We intend to diverge as little as possible from the original hashmap. + + +Dual licensed Apache/MIT, the same as the stdlib. + + +## Should I use this? + +No. + +Wait for https://github.com/rust-lang/rfcs/pull/2116 instead. diff --git a/servo/components/hashglobe/src/alloc.rs b/servo/components/hashglobe/src/alloc.rs new file mode 100644 index 000000000000..b1c7a6eca5ee --- /dev/null +++ b/servo/components/hashglobe/src/alloc.rs @@ -0,0 +1,161 @@ +// FORK NOTE: Copied from liballoc_system, removed unnecessary APIs, +// APIs take size/align directly instead of Layout + +// The minimum alignment guaranteed by the architecture. This value is used to +// add fast paths for low alignment values. In practice, the alignment is a +// constant at the call site and the branch will be optimized out. +#[cfg(all(any( + target_arch = "x86", + target_arch = "arm", + target_arch = "mips", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "asmjs", + target_arch = "wasm32" +)))] +const MIN_ALIGN: usize = 8; +#[cfg(all(any( + target_arch = "x86_64", + target_arch = "aarch64", + target_arch = "mips64", + target_arch = "s390x", + target_arch = "sparc64" +)))] +const MIN_ALIGN: usize = 16; + +pub use self::platform::{alloc, dealloc, realloc}; + +#[cfg(any(unix, target_os = "redox"))] +mod platform { + extern crate libc; + + #[cfg(not(any(target_os = "android")))] + use std::ptr; + + use super::MIN_ALIGN; + + #[inline] + pub unsafe fn alloc(size: usize, align: usize) -> *mut u8 { + let ptr = if align <= MIN_ALIGN { + libc::malloc(size) as *mut u8 + } else { + aligned_malloc(size, align) + }; + ptr + } + + #[inline] + pub unsafe fn dealloc(ptr: *mut u8, _align: usize) { + libc::free(ptr as *mut libc::c_void) + } + + #[inline] + pub unsafe fn realloc(ptr: *mut u8, new_size: usize) -> *mut u8 { + libc::realloc(ptr as *mut libc::c_void, new_size) as *mut u8 + } + + #[cfg(any(target_os = "android", target_os = "redox"))] + #[inline] + unsafe fn aligned_malloc(size: usize, align: usize) -> *mut u8 { + // On android we currently target API level 9 which unfortunately + // doesn't have the `posix_memalign` API used below. Instead we use + // `memalign`, but this unfortunately has the property on some systems + // where the memory returned cannot be deallocated by `free`! + // + // Upon closer inspection, however, this appears to work just fine with + // Android, so for this platform we should be fine to call `memalign` + // (which is present in API level 9). Some helpful references could + // possibly be chromium using memalign [1], attempts at documenting that + // memalign + free is ok [2] [3], or the current source of chromium + // which still uses memalign on android [4]. + // + // [1]: https://codereview.chromium.org/10796020/ + // [2]: https://code.google.com/p/android/issues/detail?id=35391 + // [3]: https://bugs.chromium.org/p/chromium/issues/detail?id=138579 + // [4]: https://chromium.googlesource.com/chromium/src/base/+/master/ + // /memory/aligned_memory.cc + libc::memalign(align, size) as *mut u8 + } + + #[cfg(not(any(target_os = "android", target_os = "redox")))] + #[inline] + unsafe fn aligned_malloc(size: usize, align: usize) -> *mut u8 { + let mut out = ptr::null_mut(); + let ret = libc::posix_memalign(&mut out, align, size); + if ret != 0 { + ptr::null_mut() + } else { + out as *mut u8 + } + } +} + +#[cfg(windows)] +#[allow(bad_style)] +mod platform { + + use super::MIN_ALIGN; + type LPVOID = *mut u8; + type HANDLE = LPVOID; + type SIZE_T = usize; + type DWORD = u32; + type BOOL = i32; + + extern "system" { + fn GetProcessHeap() -> HANDLE; + fn HeapAlloc(hHeap: HANDLE, dwFlags: DWORD, dwBytes: SIZE_T) -> LPVOID; + fn HeapReAlloc(hHeap: HANDLE, dwFlags: DWORD, lpMem: LPVOID, dwBytes: SIZE_T) -> LPVOID; + fn HeapFree(hHeap: HANDLE, dwFlags: DWORD, lpMem: LPVOID) -> BOOL; + fn GetLastError() -> DWORD; + } + + #[repr(C)] + struct Header(*mut u8); + + unsafe fn get_header<'a>(ptr: *mut u8) -> &'a mut Header { + &mut *(ptr as *mut Header).offset(-1) + } + + unsafe fn align_ptr(ptr: *mut u8, align: usize) -> *mut u8 { + let aligned = ptr.offset((align - (ptr as usize & (align - 1))) as isize); + *get_header(aligned) = Header(ptr); + aligned + } + + #[inline] + unsafe fn allocate_with_flags(size: usize, align: usize, flags: DWORD) -> *mut u8 { + if align <= MIN_ALIGN { + HeapAlloc(GetProcessHeap(), flags, size) + } else { + let size = size + align; + let ptr = HeapAlloc(GetProcessHeap(), flags, size); + if ptr.is_null() { + ptr + } else { + align_ptr(ptr, align) + } + } + } + + #[inline] + pub unsafe fn alloc(size: usize, align: usize) -> *mut u8 { + allocate_with_flags(size, align, 0) + } + + #[inline] + pub unsafe fn dealloc(ptr: *mut u8, align: usize) { + if align <= MIN_ALIGN { + let err = HeapFree(GetProcessHeap(), 0, ptr as LPVOID); + debug_assert!(err != 0, "Failed to free heap memory: {}", GetLastError()); + } else { + let header = get_header(ptr); + let err = HeapFree(GetProcessHeap(), 0, header.0 as LPVOID); + debug_assert!(err != 0, "Failed to free heap memory: {}", GetLastError()); + } + } + + #[inline] + pub unsafe fn realloc(ptr: *mut u8, new_size: usize) -> *mut u8 { + HeapReAlloc(GetProcessHeap(), 0, ptr as LPVOID, new_size) as *mut u8 + } +} diff --git a/servo/components/hashglobe/src/fake.rs b/servo/components/hashglobe/src/fake.rs new file mode 100644 index 000000000000..339c54a49915 --- /dev/null +++ b/servo/components/hashglobe/src/fake.rs @@ -0,0 +1,269 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! This module contains shims around the stdlib HashMap +//! that add fallible methods +//! +//! These methods are a lie. They are not actually fallible. This is just to make +//! it smooth to switch between hashmap impls in a codebase. + +use std::collections::HashMap as StdMap; +use std::collections::HashSet as StdSet; +use std::fmt; +use std::hash::{BuildHasher, Hash}; +use std::ops::{Deref, DerefMut}; + +pub use std::collections::hash_map::{Entry, Iter as MapIter, IterMut as MapIterMut, RandomState}; +pub use std::collections::hash_set::{IntoIter as SetIntoIter, Iter as SetIter}; + +#[derive(Clone)] +pub struct HashMap(StdMap); + +use crate::FailedAllocationError; + +impl Deref for HashMap { + type Target = StdMap; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for HashMap { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + #[inline] + pub fn try_with_hasher(hash_builder: S) -> Result, FailedAllocationError> { + Ok(HashMap(StdMap::with_hasher(hash_builder))) + } + + #[inline] + pub fn try_with_capacity_and_hasher( + capacity: usize, + hash_builder: S, + ) -> Result, FailedAllocationError> { + Ok(HashMap(StdMap::with_capacity_and_hasher( + capacity, + hash_builder, + ))) + } + + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> HashMap { + HashMap(StdMap::with_capacity_and_hasher(capacity, hash_builder)) + } + + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), FailedAllocationError> { + Ok(self.reserve(additional)) + } + + pub fn try_shrink_to_fit(&mut self) -> Result<(), FailedAllocationError> { + Ok(self.shrink_to_fit()) + } + + pub fn try_entry(&mut self, key: K) -> Result, FailedAllocationError> { + Ok(self.entry(key)) + } + + #[inline] + pub fn try_insert(&mut self, k: K, v: V) -> Result, FailedAllocationError> { + Ok(self.insert(k, v)) + } +} + +#[derive(Clone)] +pub struct HashSet(StdSet); + +impl Deref for HashSet { + type Target = StdSet; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for HashSet { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl HashSet { + #[inline] + pub fn new() -> HashSet { + HashSet(StdSet::new()) + } + + #[inline] + pub fn with_capacity(capacity: usize) -> HashSet { + HashSet(StdSet::with_capacity(capacity)) + } +} + +impl HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + #[inline] + pub fn with_hasher(hasher: S) -> HashSet { + HashSet(StdSet::with_hasher(hasher)) + } + + #[inline] + pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> HashSet { + HashSet(StdSet::with_capacity_and_hasher(capacity, hasher)) + } + + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), FailedAllocationError> { + Ok(self.reserve(additional)) + } + + #[inline] + pub fn try_shrink_to_fit(&mut self) -> Result<(), FailedAllocationError> { + Ok(self.shrink_to_fit()) + } + + #[inline] + pub fn try_insert(&mut self, value: T) -> Result { + Ok(self.insert(value)) + } +} + +// Pass through trait impls +// We can't derive these since the bounds are not obvious to the derive macro + +impl Default for HashMap { + fn default() -> Self { + HashMap(Default::default()) + } +} + +impl fmt::Debug for HashMap +where + K: Eq + Hash + fmt::Debug, + V: fmt::Debug, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl PartialEq for HashMap +where + K: Eq + Hash, + V: PartialEq, + S: BuildHasher, +{ + fn eq(&self, other: &HashMap) -> bool { + self.0.eq(&other.0) + } +} + +impl Eq for HashMap +where + K: Eq + Hash, + V: Eq, + S: BuildHasher, +{ +} + +impl<'a, K, V, S> IntoIterator for &'a HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + type Item = (&'a K, &'a V); + type IntoIter = MapIter<'a, K, V>; + + fn into_iter(self) -> MapIter<'a, K, V> { + self.0.iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + type Item = (&'a K, &'a mut V); + type IntoIter = MapIterMut<'a, K, V>; + + fn into_iter(self) -> MapIterMut<'a, K, V> { + self.0.iter_mut() + } +} + +impl Default for HashSet { + fn default() -> Self { + HashSet(Default::default()) + } +} + +impl fmt::Debug for HashSet +where + T: Eq + Hash + fmt::Debug, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(f) + } +} + +impl PartialEq for HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + fn eq(&self, other: &HashSet) -> bool { + self.0.eq(&other.0) + } +} + +impl Eq for HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ +} + +impl<'a, T, S> IntoIterator for &'a HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + type IntoIter = SetIter<'a, T>; + + fn into_iter(self) -> SetIter<'a, T> { + self.0.iter() + } +} + +impl IntoIterator for HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = T; + type IntoIter = SetIntoIter; + + fn into_iter(self) -> SetIntoIter { + self.0.into_iter() + } +} diff --git a/servo/components/hashglobe/src/hash_map.rs b/servo/components/hashglobe/src/hash_map.rs new file mode 100644 index 000000000000..d2893627e1dd --- /dev/null +++ b/servo/components/hashglobe/src/hash_map.rs @@ -0,0 +1,3087 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use self::Entry::*; +use self::VacantEntryState::*; + +use std::borrow::Borrow; +use std::cmp::max; +use std::fmt::{self, Debug}; +#[allow(deprecated)] +use std::hash::{BuildHasher, Hash}; +use std::iter::FromIterator; +use std::mem::{self, replace}; +use std::ops::{Deref, Index}; + +use super::table::BucketState::{Empty, Full}; +use super::table::{self, Bucket, EmptyBucket, FullBucket, FullBucketMut, RawTable, SafeHash}; + +use crate::FailedAllocationError; + +const MIN_NONZERO_RAW_CAPACITY: usize = 32; // must be a power of two + +/// The default behavior of HashMap implements a maximum load factor of 90.9%. +#[derive(Clone)] +struct DefaultResizePolicy; + +impl DefaultResizePolicy { + fn new() -> DefaultResizePolicy { + DefaultResizePolicy + } + + /// A hash map's "capacity" is the number of elements it can hold without + /// being resized. Its "raw capacity" is the number of slots required to + /// provide that capacity, accounting for maximum loading. The raw capacity + /// is always zero or a power of two. + #[inline] + fn raw_capacity(&self, len: usize) -> usize { + if len == 0 { + 0 + } else { + // 1. Account for loading: `raw_capacity >= len * 1.1`. + // 2. Ensure it is a power of two. + // 3. Ensure it is at least the minimum size. + let mut raw_cap = len * 11 / 10; + assert!(raw_cap >= len, "raw_cap overflow"); + raw_cap = raw_cap + .checked_next_power_of_two() + .expect("raw_capacity overflow"); + raw_cap = max(MIN_NONZERO_RAW_CAPACITY, raw_cap); + raw_cap + } + } + + /// The capacity of the given raw capacity. + #[inline] + fn capacity(&self, raw_cap: usize) -> usize { + // This doesn't have to be checked for overflow since allocation size + // in bytes will overflow earlier than multiplication by 10. + // + // As per https://github.com/rust-lang/rust/pull/30991 this is updated + // to be: (raw_cap * den + den - 1) / num + (raw_cap * 10 + 10 - 1) / 11 + } +} + +// The main performance trick in this hashmap is called Robin Hood Hashing. +// It gains its excellent performance from one essential operation: +// +// If an insertion collides with an existing element, and that element's +// "probe distance" (how far away the element is from its ideal location) +// is higher than how far we've already probed, swap the elements. +// +// This massively lowers variance in probe distance, and allows us to get very +// high load factors with good performance. The 90% load factor I use is rather +// conservative. +// +// > Why a load factor of approximately 90%? +// +// In general, all the distances to initial buckets will converge on the mean. +// At a load factor of α, the odds of finding the target bucket after k +// probes is approximately 1-α^k. If we set this equal to 50% (since we converge +// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round +// this down to make the math easier on the CPU and avoid its FPU. +// Since on average we start the probing in the middle of a cache line, this +// strategy pulls in two cache lines of hashes on every lookup. I think that's +// pretty good, but if you want to trade off some space, it could go down to one +// cache line on average with an α of 0.84. +// +// > Wait, what? Where did you get 1-α^k from? +// +// On the first probe, your odds of a collision with an existing element is α. +// The odds of doing this twice in a row is approximately α^2. For three times, +// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT +// colliding after k tries is 1-α^k. +// +// The paper from 1986 cited below mentions an implementation which keeps track +// of the distance-to-initial-bucket histogram. This approach is not suitable +// for modern architectures because it requires maintaining an internal data +// structure. This allows very good first guesses, but we are most concerned +// with guessing entire cache lines, not individual indexes. Furthermore, array +// accesses are no longer linear and in one direction, as we have now. There +// is also memory and cache pressure that this would entail that would be very +// difficult to properly see in a microbenchmark. +// +// ## Future Improvements (FIXME!) +// +// Allow the load factor to be changed dynamically and/or at initialization. +// +// Also, would it be possible for us to reuse storage when growing the +// underlying table? This is exactly the use case for 'realloc', and may +// be worth exploring. +// +// ## Future Optimizations (FIXME!) +// +// Another possible design choice that I made without any real reason is +// parameterizing the raw table over keys and values. Technically, all we need +// is the size and alignment of keys and values, and the code should be just as +// efficient (well, we might need one for power-of-two size and one for not...). +// This has the potential to reduce code bloat in rust executables, without +// really losing anything except 4 words (key size, key alignment, val size, +// val alignment) which can be passed in to every call of a `RawTable` function. +// This would definitely be an avenue worth exploring if people start complaining +// about the size of rust executables. +// +// Annotate exceedingly likely branches in `table::make_hash` +// and `search_hashed` to reduce instruction cache pressure +// and mispredictions once it becomes possible (blocked on issue #11092). +// +// Shrinking the table could simply reallocate in place after moving buckets +// to the first half. +// +// The growth algorithm (fragment of the Proof of Correctness) +// -------------------- +// +// The growth algorithm is basically a fast path of the naive reinsertion- +// during-resize algorithm. Other paths should never be taken. +// +// Consider growing a robin hood hashtable of capacity n. Normally, we do this +// by allocating a new table of capacity `2n`, and then individually reinsert +// each element in the old table into the new one. This guarantees that the +// new table is a valid robin hood hashtable with all the desired statistical +// properties. Remark that the order we reinsert the elements in should not +// matter. For simplicity and efficiency, we will consider only linear +// reinsertions, which consist of reinserting all elements in the old table +// into the new one by increasing order of index. However we will not be +// starting our reinsertions from index 0 in general. If we start from index +// i, for the purpose of reinsertion we will consider all elements with real +// index j < i to have virtual index n + j. +// +// Our hash generation scheme consists of generating a 64-bit hash and +// truncating the most significant bits. When moving to the new table, we +// simply introduce a new bit to the front of the hash. Therefore, if an +// elements has ideal index i in the old table, it can have one of two ideal +// locations in the new table. If the new bit is 0, then the new ideal index +// is i. If the new bit is 1, then the new ideal index is n + i. Intuitively, +// we are producing two independent tables of size n, and for each element we +// independently choose which table to insert it into with equal probability. +// However the rather than wrapping around themselves on overflowing their +// indexes, the first table overflows into the first, and the first into the +// second. Visually, our new table will look something like: +// +// [yy_xxx_xxxx_xxx|xx_yyy_yyyy_yyy] +// +// Where x's are elements inserted into the first table, y's are elements +// inserted into the second, and _'s are empty sections. We now define a few +// key concepts that we will use later. Note that this is a very abstract +// perspective of the table. A real resized table would be at least half +// empty. +// +// Theorem: A linear robin hood reinsertion from the first ideal element +// produces identical results to a linear naive reinsertion from the same +// element. +// +// FIXME(Gankro, pczarn): review the proof and put it all in a separate README.md +// +// Adaptive early resizing +// ---------------------- +// To protect against degenerate performance scenarios (including DOS attacks), +// the implementation includes an adaptive behavior that can resize the map +// early (before its capacity is exceeded) when suspiciously long probe sequences +// are encountered. +// +// With this algorithm in place it would be possible to turn a CPU attack into +// a memory attack due to the aggressive resizing. To prevent that the +// adaptive behavior only triggers when the map is at least half full. +// This reduces the effectiveness of the algorithm but also makes it completely safe. +// +// The previous safety measure also prevents degenerate interactions with +// really bad quality hash algorithms that can make normal inputs look like a +// DOS attack. +// +const DISPLACEMENT_THRESHOLD: usize = 128; +// +// The threshold of 128 is chosen to minimize the chance of exceeding it. +// In particular, we want that chance to be less than 10^-8 with a load of 90%. +// For displacement, the smallest constant that fits our needs is 90, +// so we round that up to 128. +// +// At a load factor of α, the odds of finding the target bucket after exactly n +// unsuccessful probes[1] are +// +// Pr_α{displacement = n} = +// (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1)) +// +// We use this formula to find the probability of triggering the adaptive behavior +// +// Pr_0.909{displacement > 128} = 1.601 * 10^-11 +// +// 1. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing +// hashing with buckets. + +/// A hash map implemented with linear probing and Robin Hood bucket stealing. +/// +/// By default, `HashMap` uses a hashing algorithm selected to provide +/// resistance against HashDoS attacks. The algorithm is randomly seeded, and a +/// reasonable best-effort is made to generate this seed from a high quality, +/// secure source of randomness provided by the host without blocking the +/// program. Because of this, the randomness of the seed depends on the output +/// quality of the system's random number generator when the seed is created. +/// In particular, seeds generated when the system's entropy pool is abnormally +/// low such as during system boot may be of a lower quality. +/// +/// The default hashing algorithm is currently SipHash 1-3, though this is +/// subject to change at any point in the future. While its performance is very +/// competitive for medium sized keys, other hashing algorithms will outperform +/// it for small keys such as integers as well as large keys such as long +/// strings, though those algorithms will typically *not* protect against +/// attacks such as HashDoS. +/// +/// The hashing algorithm can be replaced on a per-`HashMap` basis using the +/// [`default`], [`with_hasher`], and [`with_capacity_and_hasher`] methods. Many +/// alternative algorithms are available on crates.io, such as the [`fnv`] crate. +/// +/// It is required that the keys implement the [`Eq`] and [`Hash`] traits, although +/// this can frequently be achieved by using `#[derive(PartialEq, Eq, Hash)]`. +/// If you implement these yourself, it is important that the following +/// property holds: +/// +/// ```text +/// k1 == k2 -> hash(k1) == hash(k2) +/// ``` +/// +/// In other words, if two keys are equal, their hashes must be equal. +/// +/// It is a logic error for a key to be modified in such a way that the key's +/// hash, as determined by the [`Hash`] trait, or its equality, as determined by +/// the [`Eq`] trait, changes while it is in the map. This is normally only +/// possible through [`Cell`], [`RefCell`], global state, I/O, or unsafe code. +/// +/// Relevant papers/articles: +/// +/// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf) +/// 2. Emmanuel Goossaert. ["Robin Hood +/// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/) +/// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift +/// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/) +/// +/// # Examples +/// +/// ``` +/// use std::collections::HashMap; +/// +/// // type inference lets us omit an explicit type signature (which +/// // would be `HashMap<&str, &str>` in this example). +/// let mut book_reviews = HashMap::new(); +/// +/// // review some books. +/// book_reviews.insert("Adventures of Huckleberry Finn", "My favorite book."); +/// book_reviews.insert("Grimms' Fairy Tales", "Masterpiece."); +/// book_reviews.insert("Pride and Prejudice", "Very enjoyable."); +/// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot."); +/// +/// // check for a specific one. +/// if !book_reviews.contains_key("Les Misérables") { +/// println!("We've got {} reviews, but Les Misérables ain't one.", +/// book_reviews.len()); +/// } +/// +/// // oops, this review has a lot of spelling mistakes, let's delete it. +/// book_reviews.remove("The Adventures of Sherlock Holmes"); +/// +/// // look up the values associated with some keys. +/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"]; +/// for book in &to_find { +/// match book_reviews.get(book) { +/// Some(review) => println!("{}: {}", book, review), +/// None => println!("{} is unreviewed.", book) +/// } +/// } +/// +/// // iterate over everything. +/// for (book, review) in &book_reviews { +/// println!("{}: \"{}\"", book, review); +/// } +/// ``` +/// +/// `HashMap` also implements an [`Entry API`](#method.entry), which allows +/// for more complex methods of getting, setting, updating and removing keys and +/// their values: +/// +/// ``` +/// use std::collections::HashMap; +/// +/// // type inference lets us omit an explicit type signature (which +/// // would be `HashMap<&str, u8>` in this example). +/// let mut player_stats = HashMap::new(); +/// +/// fn random_stat_buff() -> u8 { +/// // could actually return some random value here - let's just return +/// // some fixed value for now +/// 42 +/// } +/// +/// // insert a key only if it doesn't already exist +/// player_stats.entry("health").or_insert(100); +/// +/// // insert a key using a function that provides a new value only if it +/// // doesn't already exist +/// player_stats.entry("defence").or_insert_with(random_stat_buff); +/// +/// // update a key, guarding against the key possibly not being set +/// let stat = player_stats.entry("attack").or_insert(100); +/// *stat += random_stat_buff(); +/// ``` +/// +/// The easiest way to use `HashMap` with a custom type as key is to derive [`Eq`] and [`Hash`]. +/// We must also derive [`PartialEq`]. +/// +/// [`Eq`]: ../../std/cmp/trait.Eq.html +/// [`Hash`]: ../../std/hash/trait.Hash.html +/// [`PartialEq`]: ../../std/cmp/trait.PartialEq.html +/// [`RefCell`]: ../../std/cell/struct.RefCell.html +/// [`Cell`]: ../../std/cell/struct.Cell.html +/// [`default`]: #method.default +/// [`with_hasher`]: #method.with_hasher +/// [`with_capacity_and_hasher`]: #method.with_capacity_and_hasher +/// [`fnv`]: https://crates.io/crates/fnv +/// +/// ``` +/// use std::collections::HashMap; +/// +/// #[derive(Hash, Eq, PartialEq, Debug)] +/// struct Viking { +/// name: String, +/// country: String, +/// } +/// +/// impl Viking { +/// /// Create a new Viking. +/// fn new(name: &str, country: &str) -> Viking { +/// Viking { name: name.to_string(), country: country.to_string() } +/// } +/// } +/// +/// // Use a HashMap to store the vikings' health points. +/// let mut vikings = HashMap::new(); +/// +/// vikings.insert(Viking::new("Einar", "Norway"), 25); +/// vikings.insert(Viking::new("Olaf", "Denmark"), 24); +/// vikings.insert(Viking::new("Harald", "Iceland"), 12); +/// +/// // Use derived implementation to print the status of the vikings. +/// for (viking, health) in &vikings { +/// println!("{:?} has {} hp", viking, health); +/// } +/// ``` +/// +/// A `HashMap` with fixed list of elements can be initialized from an array: +/// +/// ``` +/// use std::collections::HashMap; +/// +/// fn main() { +/// let timber_resources: HashMap<&str, i32> = +/// [("Norway", 100), +/// ("Denmark", 50), +/// ("Iceland", 10)] +/// .iter().cloned().collect(); +/// // use the values stored in map +/// } +/// ``` + +#[derive(Clone)] +pub struct HashMap { + // All hashes are keyed on these values, to prevent hash collision attacks. + hash_builder: S, + + table: RawTable, + + resize_policy: DefaultResizePolicy, +} + +/// Search for a pre-hashed key. +#[inline] +fn search_hashed(table: M, hash: SafeHash, mut is_match: F) -> InternalEntry +where + M: Deref>, + F: FnMut(&K) -> bool, +{ + // This is the only function where capacity can be zero. To avoid + // undefined behavior when Bucket::new gets the raw bucket in this + // case, immediately return the appropriate search result. + if table.capacity() == 0 { + return InternalEntry::TableIsEmpty; + } + + let size = table.size(); + let mut probe = Bucket::new(table, hash); + let mut displacement = 0; + + loop { + let full = match probe.peek() { + Empty(bucket) => { + // Found a hole! + return InternalEntry::Vacant { + hash, + elem: NoElem(bucket, displacement), + }; + }, + Full(bucket) => bucket, + }; + + let probe_displacement = full.displacement(); + + if probe_displacement < displacement { + // Found a luckier bucket than me. + // We can finish the search early if we hit any bucket + // with a lower distance to initial bucket than we've probed. + return InternalEntry::Vacant { + hash, + elem: NeqElem(full, probe_displacement), + }; + } + + // If the hash doesn't match, it can't be this one.. + if hash == full.hash() { + // If the key doesn't match, it can't be this one.. + if is_match(full.read().0) { + return InternalEntry::Occupied { elem: full }; + } + } + displacement += 1; + probe = full.next(); + debug_assert!(displacement <= size); + } +} + +fn pop_internal(starting_bucket: FullBucketMut) -> (K, V, &mut RawTable) { + let (empty, retkey, retval) = starting_bucket.take(); + let mut gap = match empty.gap_peek() { + Ok(b) => b, + Err(b) => return (retkey, retval, b.into_table()), + }; + + while gap.full().displacement() != 0 { + gap = match gap.shift() { + Ok(b) => b, + Err(b) => { + return (retkey, retval, b.into_table()); + }, + }; + } + + // Now we've done all our shifting. Return the value we grabbed earlier. + (retkey, retval, gap.into_table()) +} + +/// Perform robin hood bucket stealing at the given `bucket`. You must +/// also pass that bucket's displacement so we don't have to recalculate it. +/// +/// `hash`, `key`, and `val` are the elements to "robin hood" into the hashtable. +fn robin_hood<'a, K: 'a, V: 'a>( + bucket: FullBucketMut<'a, K, V>, + mut displacement: usize, + mut hash: SafeHash, + mut key: K, + mut val: V, +) -> FullBucketMut<'a, K, V> { + let size = bucket.table().size(); + let raw_capacity = bucket.table().capacity(); + // There can be at most `size - dib` buckets to displace, because + // in the worst case, there are `size` elements and we already are + // `displacement` buckets away from the initial one. + let idx_end = (bucket.index() + size - bucket.displacement()) % raw_capacity; + // Save the *starting point*. + let mut bucket = bucket.stash(); + + loop { + let (old_hash, old_key, old_val) = bucket.replace(hash, key, val); + hash = old_hash; + key = old_key; + val = old_val; + + loop { + displacement += 1; + let probe = bucket.next(); + debug_assert_ne!(probe.index(), idx_end); + + let full_bucket = match probe.peek() { + Empty(bucket) => { + // Found a hole! + let bucket = bucket.put(hash, key, val); + // Now that it's stolen, just read the value's pointer + // right out of the table! Go back to the *starting point*. + // + // This use of `into_table` is misleading. It turns the + // bucket, which is a FullBucket on top of a + // FullBucketMut, into just one FullBucketMut. The "table" + // refers to the inner FullBucketMut in this context. + return bucket.into_table(); + }, + Full(bucket) => bucket, + }; + + let probe_displacement = full_bucket.displacement(); + + bucket = full_bucket; + + // Robin hood! Steal the spot. + if probe_displacement < displacement { + displacement = probe_displacement; + break; + } + } + } +} + +impl HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + fn make_hash(&self, x: &X) -> SafeHash + where + X: Hash, + { + table::make_hash(&self.hash_builder, x) + } + + /// Search for a key, yielding the index if it's found in the hashtable. + /// If you already have the hash for the key lying around, use + /// search_hashed. + #[inline] + fn search<'a, Q: ?Sized>(&'a self, q: &Q) -> InternalEntry> + where + K: Borrow, + Q: Eq + Hash, + { + let hash = self.make_hash(q); + search_hashed(&self.table, hash, |k| q.eq(k.borrow())) + } + + #[inline] + fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) -> InternalEntry> + where + K: Borrow, + Q: Eq + Hash, + { + let hash = self.make_hash(q); + search_hashed(&mut self.table, hash, |k| q.eq(k.borrow())) + } + + // The caller should ensure that invariants by Robin Hood Hashing hold + // and that there's space in the underlying table. + fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { + let mut buckets = Bucket::new(&mut self.table, hash); + let start_index = buckets.index(); + + loop { + // We don't need to compare hashes for value swap. + // Not even DIBs for Robin Hood. + buckets = match buckets.peek() { + Empty(empty) => { + empty.put(hash, k, v); + return; + }, + Full(b) => b.into_bucket(), + }; + buckets.next(); + debug_assert_ne!(buckets.index(), start_index); + } + } +} + +impl HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + /// Creates an empty `HashMap` which will use the given hash builder to hash + /// keys. + /// + /// The created map has the default initial capacity. + /// + /// Warning: `hash_builder` is normally randomly generated, and + /// is designed to allow HashMaps to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut map = HashMap::with_hasher(s); + /// map.insert(1, 2); + /// ``` + #[inline] + pub fn try_with_hasher(hash_builder: S) -> Result, FailedAllocationError> { + Ok(HashMap { + hash_builder, + resize_policy: DefaultResizePolicy::new(), + table: RawTable::new(0)?, + }) + } + + #[inline] + pub fn with_hasher(hash_builder: S) -> HashMap { + Self::try_with_hasher(hash_builder).unwrap() + } + + /// Creates an empty `HashMap` with the specified capacity, using `hash_builder` + /// to hash the keys. + /// + /// The hash map will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash map will not allocate. + /// + /// Warning: `hash_builder` is normally randomly generated, and + /// is designed to allow HashMaps to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut map = HashMap::with_capacity_and_hasher(10, s); + /// map.insert(1, 2); + /// ``` + #[inline] + pub fn try_with_capacity_and_hasher( + capacity: usize, + hash_builder: S, + ) -> Result, FailedAllocationError> { + let resize_policy = DefaultResizePolicy::new(); + let raw_cap = resize_policy.raw_capacity(capacity); + Ok(HashMap { + hash_builder, + resize_policy, + table: RawTable::new(raw_cap)?, + }) + } + + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> HashMap { + Self::try_with_capacity_and_hasher(capacity, hash_builder).unwrap() + } + + /// Returns a reference to the map's [`BuildHasher`]. + /// + /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html + pub fn hasher(&self) -> &S { + &self.hash_builder + } + + /// Returns the number of elements the map can hold without reallocating. + /// + /// This number is a lower bound; the `HashMap` might be able to hold + /// more, but is guaranteed to be able to hold at least this many. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// let map: HashMap = HashMap::with_capacity(100); + /// assert!(map.capacity() >= 100); + /// ``` + #[inline] + pub fn capacity(&self) -> usize { + self.resize_policy.capacity(self.raw_capacity()) + } + + /// Returns the hash map's raw capacity. + #[inline] + fn raw_capacity(&self) -> usize { + self.table.capacity() + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `HashMap`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new allocation size overflows [`usize`]. + /// + /// [`usize`]: ../../std/primitive.usize.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// let mut map: HashMap<&str, isize> = HashMap::new(); + /// map.reserve(10); + /// ``` + pub fn reserve(&mut self, additional: usize) { + self.try_reserve(additional).unwrap(); + } + + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), FailedAllocationError> { + let remaining = self.capacity() - self.len(); // this can't overflow + if remaining < additional { + let min_cap = self + .len() + .checked_add(additional) + .expect("reserve overflow"); + let raw_cap = self.resize_policy.raw_capacity(min_cap); + self.try_resize(raw_cap)?; + } else if self.table.tag() && remaining <= self.len() { + // Probe sequence is too long and table is half full, + // resize early to reduce probing length. + let new_capacity = self.table.capacity() * 2; + self.try_resize(new_capacity)?; + } + Ok(()) + } + + #[cold] + #[inline(never)] + fn try_resize(&mut self, new_raw_cap: usize) -> Result<(), FailedAllocationError> { + assert!(self.table.size() <= new_raw_cap); + assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0); + + let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap)?); + let old_size = old_table.size(); + + if old_table.size() == 0 { + return Ok(()); + } + + let mut bucket = Bucket::head_bucket(&mut old_table); + + // This is how the buckets might be laid out in memory: + // ($ marks an initialized bucket) + // ________________ + // |$$$_$$$$$$_$$$$$| + // + // But we've skipped the entire initial cluster of buckets + // and will continue iteration in this order: + // ________________ + // |$$$$$$_$$$$$ + // ^ wrap around once end is reached + // ________________ + // $$$_____________| + // ^ exit once table.size == 0 + loop { + bucket = match bucket.peek() { + Full(bucket) => { + let h = bucket.hash(); + let (b, k, v) = bucket.take(); + self.insert_hashed_ordered(h, k, v); + if b.table().size() == 0 { + break; + } + b.into_bucket() + }, + Empty(b) => b.into_bucket(), + }; + bucket.next(); + } + + assert_eq!(self.table.size(), old_size); + Ok(()) + } + + /// Shrinks the capacity of the map as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap = HashMap::with_capacity(100); + /// map.insert(1, 2); + /// map.insert(3, 4); + /// assert!(map.capacity() >= 100); + /// map.shrink_to_fit(); + /// assert!(map.capacity() >= 2); + /// ``` + pub fn shrink_to_fit(&mut self) { + self.try_shrink_to_fit().unwrap(); + } + + pub fn try_shrink_to_fit(&mut self) -> Result<(), FailedAllocationError> { + let new_raw_cap = self.resize_policy.raw_capacity(self.len()); + if self.raw_capacity() != new_raw_cap { + let old_table = replace(&mut self.table, RawTable::new(new_raw_cap)?); + let old_size = old_table.size(); + + // Shrink the table. Naive algorithm for resizing: + for (h, k, v) in old_table.into_iter() { + self.insert_hashed_nocheck(h, k, v); + } + + debug_assert_eq!(self.table.size(), old_size); + } + Ok(()) + } + + /// Insert a pre-hashed key-value pair, without first checking + /// that there's enough room in the buckets. Returns a reference to the + /// newly insert value. + /// + /// If the key already exists, the hashtable will be returned untouched + /// and a reference to the existing element will be returned. + fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { + let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k); + match entry { + Some(Occupied(mut elem)) => Some(elem.insert(v)), + Some(Vacant(elem)) => { + elem.insert(v); + None + }, + None => unreachable!(), + } + } + + /// An iterator visiting all keys in arbitrary order. + /// The iterator element type is `&'a K`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for key in map.keys() { + /// println!("{}", key); + /// } + /// ``` + pub fn keys(&self) -> Keys { + Keys { inner: self.iter() } + } + + /// An iterator visiting all values in arbitrary order. + /// The iterator element type is `&'a V`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for val in map.values() { + /// println!("{}", val); + /// } + /// ``` + pub fn values(&self) -> Values { + Values { inner: self.iter() } + } + + /// An iterator visiting all values mutably in arbitrary order. + /// The iterator element type is `&'a mut V`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for val in map.values_mut() { + /// *val = *val + 10; + /// } + /// + /// for val in map.values() { + /// println!("{}", val); + /// } + /// ``` + pub fn values_mut(&mut self) -> ValuesMut { + ValuesMut { + inner: self.iter_mut(), + } + } + + /// An iterator visiting all key-value pairs in arbitrary order. + /// The iterator element type is `(&'a K, &'a V)`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// for (key, val) in map.iter() { + /// println!("key: {} val: {}", key, val); + /// } + /// ``` + pub fn iter(&self) -> Iter { + Iter { + inner: self.table.iter(), + } + } + + /// An iterator visiting all key-value pairs in arbitrary order, + /// with mutable references to the values. + /// The iterator element type is `(&'a K, &'a mut V)`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// // Update all values + /// for (_, val) in map.iter_mut() { + /// *val *= 2; + /// } + /// + /// for (key, val) in &map { + /// println!("key: {} val: {}", key, val); + /// } + /// ``` + pub fn iter_mut(&mut self) -> IterMut { + IterMut { + inner: self.table.iter_mut(), + } + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut letters = HashMap::new(); + /// + /// for ch in "a short treatise on fungi".chars() { + /// let counter = letters.entry(ch).or_insert(0); + /// *counter += 1; + /// } + /// + /// assert_eq!(letters[&'s'], 2); + /// assert_eq!(letters[&'t'], 3); + /// assert_eq!(letters[&'u'], 1); + /// assert_eq!(letters.get(&'y'), None); + /// ``` + pub fn entry(&mut self, key: K) -> Entry { + self.try_entry(key).unwrap() + } + + #[inline(always)] + pub fn try_entry(&mut self, key: K) -> Result, FailedAllocationError> { + // Gotta resize now. + self.try_reserve(1)?; + let hash = self.make_hash(&key); + Ok(search_hashed(&mut self.table, hash, |q| q.eq(&key)) + .into_entry(key) + .expect("unreachable")) + } + + /// Returns the number of elements in the map. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// assert_eq!(a.len(), 0); + /// a.insert(1, "a"); + /// assert_eq!(a.len(), 1); + /// ``` + pub fn len(&self) -> usize { + self.table.size() + } + + /// Returns true if the map contains no elements. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// assert!(a.is_empty()); + /// a.insert(1, "a"); + /// assert!(!a.is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Clears the map, returning all key-value pairs as an iterator. Keeps the + /// allocated memory for reuse. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// a.insert(1, "a"); + /// a.insert(2, "b"); + /// + /// for (k, v) in a.drain().take(1) { + /// assert!(k == 1 || k == 2); + /// assert!(v == "a" || v == "b"); + /// } + /// + /// assert!(a.is_empty()); + /// ``` + #[inline] + pub fn drain(&mut self) -> Drain + where + K: 'static, + V: 'static, + { + Drain { + inner: self.table.drain(), + } + } + + /// Clears the map, removing all key-value pairs. Keeps the allocated memory + /// for reuse. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut a = HashMap::new(); + /// a.insert(1, "a"); + /// a.clear(); + /// assert!(a.is_empty()); + /// ``` + #[inline] + pub fn clear(&mut self) + where + K: 'static, + V: 'static, + { + self.drain(); + } + + /// Returns a reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.get(&1), Some(&"a")); + /// assert_eq!(map.get(&2), None); + /// ``` + pub fn get(&self, k: &Q) -> Option<&V> + where + K: Borrow, + Q: Hash + Eq, + { + self.search(k) + .into_occupied_bucket() + .map(|bucket| bucket.into_refs().1) + } + + /// Returns true if the map contains a value for the specified key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.contains_key(&1), true); + /// assert_eq!(map.contains_key(&2), false); + /// ``` + pub fn contains_key(&self, k: &Q) -> bool + where + K: Borrow, + Q: Hash + Eq, + { + self.search(k).into_occupied_bucket().is_some() + } + + /// Returns a mutable reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// if let Some(x) = map.get_mut(&1) { + /// *x = "b"; + /// } + /// assert_eq!(map[&1], "b"); + /// ``` + pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> + where + K: Borrow, + Q: Hash + Eq, + { + self.search_mut(k) + .into_occupied_bucket() + .map(|bucket| bucket.into_mut_refs().1) + } + + /// Inserts a key-value pair into the map. + /// + /// If the map did not have this key present, [`None`] is returned. + /// + /// If the map did have this key present, the value is updated, and the old + /// value is returned. The key is not updated, though; this matters for + /// types that can be `==` without being identical. See the [module-level + /// documentation] for more. + /// + /// [`None`]: ../../std/option/enum.Option.html#variant.None + /// [module-level documentation]: index.html#insert-and-complex-keys + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.insert(37, "a"), None); + /// assert_eq!(map.is_empty(), false); + /// + /// map.insert(37, "b"); + /// assert_eq!(map.insert(37, "c"), Some("b")); + /// assert_eq!(map[&37], "c"); + /// ``` + pub fn insert(&mut self, k: K, v: V) -> Option { + self.try_insert(k, v).unwrap() + } + + #[inline] + pub fn try_insert(&mut self, k: K, v: V) -> Result, FailedAllocationError> { + let hash = self.make_hash(&k); + self.try_reserve(1)?; + Ok(self.insert_hashed_nocheck(hash, k, v)) + } + + /// Removes a key from the map, returning the value at the key if the key + /// was previously in the map. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.remove(&1), Some("a")); + /// assert_eq!(map.remove(&1), None); + /// ``` + pub fn remove(&mut self, k: &Q) -> Option + where + K: Borrow, + Q: Hash + Eq, + { + if self.table.size() == 0 { + return None; + } + + self.search_mut(k) + .into_occupied_bucket() + .map(|bucket| pop_internal(bucket).1) + } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all pairs `(k, v)` such that `f(&k,&mut v)` returns `false`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap = (0..8).map(|x|(x, x*10)).collect(); + /// map.retain(|&k, _| k % 2 == 0); + /// assert_eq!(map.len(), 4); + /// ``` + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&K, &mut V) -> bool, + { + if self.table.size() == 0 { + return; + } + let mut elems_left = self.table.size(); + let mut bucket = Bucket::head_bucket(&mut self.table); + bucket.prev(); + let start_index = bucket.index(); + while elems_left != 0 { + bucket = match bucket.peek() { + Full(mut full) => { + elems_left -= 1; + let should_remove = { + let (k, v) = full.read_mut(); + !f(k, v) + }; + if should_remove { + let prev_raw = full.raw(); + let (_, _, t) = pop_internal(full); + Bucket::new_from(prev_raw, t) + } else { + full.into_bucket() + } + }, + Empty(b) => b.into_bucket(), + }; + bucket.prev(); // reverse iteration + debug_assert!(elems_left == 0 || bucket.index() != start_index); + } + } +} + +impl PartialEq for HashMap +where + K: Eq + Hash, + V: PartialEq, + S: BuildHasher, +{ + fn eq(&self, other: &HashMap) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter() + .all(|(key, value)| other.get(key).map_or(false, |v| *value == *v)) + } +} + +impl Eq for HashMap +where + K: Eq + Hash, + V: Eq, + S: BuildHasher, +{ +} + +impl Debug for HashMap +where + K: Eq + Hash + Debug, + V: Debug, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_map().entries(self.iter()).finish() + } +} + +impl Default for HashMap +where + K: Eq + Hash, + S: BuildHasher + Default, +{ + /// Creates an empty `HashMap`, with the `Default` value for the hasher. + fn default() -> HashMap { + HashMap::with_hasher(Default::default()) + } +} + +impl<'a, K, Q: ?Sized, V, S> Index<&'a Q> for HashMap +where + K: Eq + Hash + Borrow, + Q: Eq + Hash, + S: BuildHasher, +{ + type Output = V; + + #[inline] + fn index(&self, index: &Q) -> &V { + self.get(index).expect("no entry found for key") + } +} + +/// An iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`iter`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`iter`]: struct.HashMap.html#method.iter +/// [`HashMap`]: struct.HashMap.html +pub struct Iter<'a, K: 'a, V: 'a> { + inner: table::Iter<'a, K, V>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +impl<'a, K, V> Clone for Iter<'a, K, V> { + fn clone(&self) -> Iter<'a, K, V> { + Iter { + inner: self.inner.clone(), + } + } +} + +impl<'a, K: Debug, V: Debug> fmt::Debug for Iter<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// A mutable iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`iter_mut`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`iter_mut`]: struct.HashMap.html#method.iter_mut +/// [`HashMap`]: struct.HashMap.html +pub struct IterMut<'a, K: 'a, V: 'a> { + inner: table::IterMut<'a, K, V>, +} + +/// An owning iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`into_iter`] method on [`HashMap`][`HashMap`] +/// (provided by the `IntoIterator` trait). See its documentation for more. +/// +/// [`into_iter`]: struct.HashMap.html#method.into_iter +/// [`HashMap`]: struct.HashMap.html +pub struct IntoIter { + pub(super) inner: table::IntoIter, +} + +/// An iterator over the keys of a `HashMap`. +/// +/// This `struct` is created by the [`keys`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`keys`]: struct.HashMap.html#method.keys +/// [`HashMap`]: struct.HashMap.html +pub struct Keys<'a, K: 'a, V: 'a> { + inner: Iter<'a, K, V>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +impl<'a, K, V> Clone for Keys<'a, K, V> { + fn clone(&self) -> Keys<'a, K, V> { + Keys { + inner: self.inner.clone(), + } + } +} + +impl<'a, K: Debug, V> fmt::Debug for Keys<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// An iterator over the values of a `HashMap`. +/// +/// This `struct` is created by the [`values`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`values`]: struct.HashMap.html#method.values +/// [`HashMap`]: struct.HashMap.html +pub struct Values<'a, K: 'a, V: 'a> { + inner: Iter<'a, K, V>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +impl<'a, K, V> Clone for Values<'a, K, V> { + fn clone(&self) -> Values<'a, K, V> { + Values { + inner: self.inner.clone(), + } + } +} + +impl<'a, K, V: Debug> fmt::Debug for Values<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +/// A draining iterator over the entries of a `HashMap`. +/// +/// This `struct` is created by the [`drain`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`drain`]: struct.HashMap.html#method.drain +/// [`HashMap`]: struct.HashMap.html +pub struct Drain<'a, K: 'static, V: 'static> { + pub(super) inner: table::Drain<'a, K, V>, +} + +/// A mutable iterator over the values of a `HashMap`. +/// +/// This `struct` is created by the [`values_mut`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`values_mut`]: struct.HashMap.html#method.values_mut +/// [`HashMap`]: struct.HashMap.html +pub struct ValuesMut<'a, K: 'a, V: 'a> { + inner: IterMut<'a, K, V>, +} + +enum InternalEntry { + Occupied { + elem: FullBucket, + }, + Vacant { + hash: SafeHash, + elem: VacantEntryState, + }, + TableIsEmpty, +} + +impl InternalEntry { + #[inline] + fn into_occupied_bucket(self) -> Option> { + match self { + InternalEntry::Occupied { elem } => Some(elem), + _ => None, + } + } +} + +impl<'a, K, V> InternalEntry> { + #[inline] + fn into_entry(self, key: K) -> Option> { + match self { + InternalEntry::Occupied { elem } => Some(Occupied(OccupiedEntry { + key: Some(key), + elem, + })), + InternalEntry::Vacant { hash, elem } => Some(Vacant(VacantEntry { hash, key, elem })), + InternalEntry::TableIsEmpty => None, + } + } +} + +/// A view into a single entry in a map, which may either be vacant or occupied. +/// +/// This `enum` is constructed from the [`entry`] method on [`HashMap`]. +/// +/// [`HashMap`]: struct.HashMap.html +/// [`entry`]: struct.HashMap.html#method.entry +pub enum Entry<'a, K: 'a, V: 'a> { + /// An occupied entry. + Occupied(OccupiedEntry<'a, K, V>), + + /// A vacant entry. + Vacant(VacantEntry<'a, K, V>), +} + +impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for Entry<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Vacant(ref v) => f.debug_tuple("Entry").field(v).finish(), + Occupied(ref o) => f.debug_tuple("Entry").field(o).finish(), + } + } +} + +/// A view into an occupied entry in a `HashMap`. +/// It is part of the [`Entry`] enum. +/// +/// [`Entry`]: enum.Entry.html +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + key: Option, + elem: FullBucket>, +} + +impl<'a, K: 'a + Debug, V: 'a + Debug> Debug for OccupiedEntry<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("OccupiedEntry") + .field("key", self.key()) + .field("value", self.get()) + .finish() + } +} + +/// A view into a vacant entry in a `HashMap`. +/// It is part of the [`Entry`] enum. +/// +/// [`Entry`]: enum.Entry.html +pub struct VacantEntry<'a, K: 'a, V: 'a> { + hash: SafeHash, + key: K, + elem: VacantEntryState>, +} + +impl<'a, K: 'a + Debug, V: 'a> Debug for VacantEntry<'a, K, V> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_tuple("VacantEntry").field(self.key()).finish() + } +} + +/// Possible states of a VacantEntry. +enum VacantEntryState { + /// The index is occupied, but the key to insert has precedence, + /// and will kick the current one out on insertion. + NeqElem(FullBucket, usize), + /// The index is genuinely vacant. + NoElem(EmptyBucket, usize), +} + +impl<'a, K, V, S> IntoIterator for &'a HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Iter<'a, K, V> { + self.iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> IterMut<'a, K, V> { + self.iter_mut() + } +} + +impl IntoIterator for HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + type Item = (K, V); + type IntoIter = IntoIter; + + /// Creates a consuming iterator, that is, one that moves each key-value + /// pair out of the map in arbitrary order. The map cannot be used after + /// calling this. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// + /// // Not possible with .iter() + /// let vec: Vec<(&str, isize)> = map.into_iter().collect(); + /// ``` + fn into_iter(self) -> IntoIter { + IntoIter { + inner: self.table.into_iter(), + } + } +} + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + #[inline] + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.inner.next() + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> fmt::Debug for IterMut<'a, K, V> +where + K: fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.inner.iter()).finish() + } +} + +impl Iterator for IntoIter { + type Item = (K, V); + + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next().map(|(_, k, v)| (k, v)) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl ExactSizeIterator for IntoIter { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.inner.iter()).finish() + } +} + +impl<'a, K, V> Iterator for Keys<'a, K, V> { + type Item = &'a K; + + #[inline] + fn next(&mut self) -> Option<&'a K> { + self.inner.next().map(|(k, _)| k) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl<'a, K, V> ExactSizeIterator for Keys<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> Iterator for Values<'a, K, V> { + type Item = &'a V; + + #[inline] + fn next(&mut self) -> Option<&'a V> { + self.inner.next().map(|(_, v)| v) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl<'a, K, V> ExactSizeIterator for Values<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} +impl<'a, K, V> Iterator for ValuesMut<'a, K, V> { + type Item = &'a mut V; + + #[inline] + fn next(&mut self) -> Option<&'a mut V> { + self.inner.next().map(|(_, v)| v) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl<'a, K, V> ExactSizeIterator for ValuesMut<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> fmt::Debug for ValuesMut<'a, K, V> +where + K: fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.inner.inner.iter()).finish() + } +} + +impl<'a, K, V> Iterator for Drain<'a, K, V> { + type Item = (K, V); + + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next().map(|(_, k, v)| (k, v)) + } + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} +impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { + #[inline] + fn len(&self) -> usize { + self.inner.len() + } +} + +impl<'a, K, V> fmt::Debug for Drain<'a, K, V> +where + K: fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.inner.iter()).finish() + } +} + +// FORK NOTE: Removed Placer impl + +impl<'a, K, V> Entry<'a, K, V> { + /// Ensures a value is in the entry by inserting the default if empty, and returns + /// a mutable reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// assert_eq!(map["poneyland"], 12); + /// + /// *map.entry("poneyland").or_insert(12) += 10; + /// assert_eq!(map["poneyland"], 22); + /// ``` + pub fn or_insert(self, default: V) -> &'a mut V { + match self { + Occupied(entry) => entry.into_mut(), + Vacant(entry) => entry.insert(default), + } + } + + /// Ensures a value is in the entry by inserting the result of the default function if empty, + /// and returns a mutable reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, String> = HashMap::new(); + /// let s = "hoho".to_string(); + /// + /// map.entry("poneyland").or_insert_with(|| s); + /// + /// assert_eq!(map["poneyland"], "hoho".to_string()); + /// ``` + pub fn or_insert_with V>(self, default: F) -> &'a mut V { + match self { + Occupied(entry) => entry.into_mut(), + Vacant(entry) => entry.insert(default()), + } + } + + /// Returns a reference to this entry's key. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); + /// ``` + pub fn key(&self) -> &K { + match *self { + Occupied(ref entry) => entry.key(), + Vacant(ref entry) => entry.key(), + } + } +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the key in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); + /// ``` + pub fn key(&self) -> &K { + self.elem.read().0 + } + + /// Take the ownership of the key and value from the map. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// // We delete the entry from the map. + /// o.remove_entry(); + /// } + /// + /// assert_eq!(map.contains_key("poneyland"), false); + /// ``` + pub fn remove_entry(self) -> (K, V) { + let (k, v, _) = pop_internal(self.elem); + (k, v) + } + + /// Gets a reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// assert_eq!(o.get(), &12); + /// } + /// ``` + pub fn get(&self) -> &V { + self.elem.read().1 + } + + /// Gets a mutable reference to the value in the entry. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// assert_eq!(map["poneyland"], 12); + /// if let Entry::Occupied(mut o) = map.entry("poneyland") { + /// *o.get_mut() += 10; + /// } + /// + /// assert_eq!(map["poneyland"], 22); + /// ``` + pub fn get_mut(&mut self) -> &mut V { + self.elem.read_mut().1 + } + + /// Converts the OccupiedEntry into a mutable reference to the value in the entry + /// with a lifetime bound to the map itself. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// assert_eq!(map["poneyland"], 12); + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// *o.into_mut() += 10; + /// } + /// + /// assert_eq!(map["poneyland"], 22); + /// ``` + pub fn into_mut(self) -> &'a mut V { + self.elem.into_mut_refs().1 + } + + /// Sets the value of the entry, and returns the entry's old value. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(mut o) = map.entry("poneyland") { + /// assert_eq!(o.insert(15), 12); + /// } + /// + /// assert_eq!(map["poneyland"], 15); + /// ``` + pub fn insert(&mut self, mut value: V) -> V { + let old_value = self.get_mut(); + mem::swap(&mut value, old_value); + value + } + + /// Takes the value out of the entry, and returns it. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// map.entry("poneyland").or_insert(12); + /// + /// if let Entry::Occupied(o) = map.entry("poneyland") { + /// assert_eq!(o.remove(), 12); + /// } + /// + /// assert_eq!(map.contains_key("poneyland"), false); + /// ``` + pub fn remove(self) -> V { + pop_internal(self.elem).1 + } + + /// Returns a key that was used for search. + /// + /// The key was retained for further use. + fn take_key(&mut self) -> Option { + self.key.take() + } +} + +impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { + /// Gets a reference to the key that would be used when inserting a value + /// through the `VacantEntry`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// assert_eq!(map.entry("poneyland").key(), &"poneyland"); + /// ``` + pub fn key(&self) -> &K { + &self.key + } + + /// Take ownership of the key. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// + /// if let Entry::Vacant(v) = map.entry("poneyland") { + /// v.into_key(); + /// } + /// ``` + pub fn into_key(self) -> K { + self.key + } + + /// Sets the value of the entry with the VacantEntry's key, + /// and returns a mutable reference to it. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::collections::hash_map::Entry; + /// + /// let mut map: HashMap<&str, u32> = HashMap::new(); + /// + /// if let Entry::Vacant(o) = map.entry("poneyland") { + /// o.insert(37); + /// } + /// assert_eq!(map["poneyland"], 37); + /// ``` + pub fn insert(self, value: V) -> &'a mut V { + let b = match self.elem { + NeqElem(mut bucket, disp) => { + if disp >= DISPLACEMENT_THRESHOLD { + bucket.table_mut().set_tag(true); + } + robin_hood(bucket, disp, self.hash, self.key, value) + }, + NoElem(mut bucket, disp) => { + if disp >= DISPLACEMENT_THRESHOLD { + bucket.table_mut().set_tag(true); + } + bucket.put(self.hash, self.key, value) + }, + }; + b.into_mut_refs().1 + } +} + +impl FromIterator<(K, V)> for HashMap +where + K: Eq + Hash, + S: BuildHasher + Default, +{ + fn from_iter>(iter: T) -> HashMap { + let mut map = HashMap::with_hasher(Default::default()); + map.extend(iter); + map + } +} + +impl Extend<(K, V)> for HashMap +where + K: Eq + Hash, + S: BuildHasher, +{ + fn extend>(&mut self, iter: T) { + // Keys may be already present or show multiple times in the iterator. + // Reserve the entire hint lower bound if the map is empty. + // Otherwise reserve half the hint (rounded up), so the map + // will only resize twice in the worst case. + let iter = iter.into_iter(); + let reserve = if self.is_empty() { + iter.size_hint().0 + } else { + (iter.size_hint().0 + 1) / 2 + }; + self.reserve(reserve); + for (k, v) in iter { + self.insert(k, v); + } + } +} + +impl<'a, K, V, S> Extend<(&'a K, &'a V)> for HashMap +where + K: Eq + Hash + Copy, + V: Copy, + S: BuildHasher, +{ + fn extend>(&mut self, iter: T) { + self.extend(iter.into_iter().map(|(&key, &value)| (key, value))); + } +} + +// FORK NOTE: These can be reused +pub use std::collections::hash_map::{DefaultHasher, RandomState}; + +impl super::Recover for HashMap +where + K: Eq + Hash + Borrow, + S: BuildHasher, + Q: Eq + Hash, +{ + type Key = K; + + fn get(&self, key: &Q) -> Option<&K> { + self.search(key) + .into_occupied_bucket() + .map(|bucket| bucket.into_refs().0) + } + + fn take(&mut self, key: &Q) -> Option { + if self.table.size() == 0 { + return None; + } + + self.search_mut(key) + .into_occupied_bucket() + .map(|bucket| pop_internal(bucket).0) + } + + fn replace(&mut self, key: K) -> Option { + self.reserve(1); + + match self.entry(key) { + Occupied(mut occupied) => { + let key = occupied.take_key().unwrap(); + Some(mem::replace(occupied.elem.read_mut().0, key)) + }, + Vacant(vacant) => { + vacant.insert(()); + None + }, + } + } +} + +#[allow(dead_code)] +fn assert_covariance() { + fn map_key<'new>(v: HashMap<&'static str, u8>) -> HashMap<&'new str, u8> { + v + } + fn map_val<'new>(v: HashMap) -> HashMap { + v + } + fn iter_key<'a, 'new>(v: Iter<'a, &'static str, u8>) -> Iter<'a, &'new str, u8> { + v + } + fn iter_val<'a, 'new>(v: Iter<'a, u8, &'static str>) -> Iter<'a, u8, &'new str> { + v + } + fn into_iter_key<'new>(v: IntoIter<&'static str, u8>) -> IntoIter<&'new str, u8> { + v + } + fn into_iter_val<'new>(v: IntoIter) -> IntoIter { + v + } + fn keys_key<'a, 'new>(v: Keys<'a, &'static str, u8>) -> Keys<'a, &'new str, u8> { + v + } + fn keys_val<'a, 'new>(v: Keys<'a, u8, &'static str>) -> Keys<'a, u8, &'new str> { + v + } + fn values_key<'a, 'new>(v: Values<'a, &'static str, u8>) -> Values<'a, &'new str, u8> { + v + } + fn values_val<'a, 'new>(v: Values<'a, u8, &'static str>) -> Values<'a, u8, &'new str> { + v + } + fn drain<'new>( + d: Drain<'static, &'static str, &'static str>, + ) -> Drain<'new, &'new str, &'new str> { + d + } +} + +#[cfg(test)] +mod test_map { + extern crate rand; + use self::rand::{thread_rng, Rng}; + use super::Entry::{Occupied, Vacant}; + use super::HashMap; + use super::RandomState; + use cell::RefCell; + + #[test] + fn test_zero_capacities() { + type HM = HashMap; + + let m = HM::new(); + assert_eq!(m.capacity(), 0); + + let m = HM::default(); + assert_eq!(m.capacity(), 0); + + let m = HM::with_hasher(RandomState::new()); + assert_eq!(m.capacity(), 0); + + let m = HM::with_capacity(0); + assert_eq!(m.capacity(), 0); + + let m = HM::with_capacity_and_hasher(0, RandomState::new()); + assert_eq!(m.capacity(), 0); + + let mut m = HM::new(); + m.insert(1, 1); + m.insert(2, 2); + m.remove(&1); + m.remove(&2); + m.shrink_to_fit(); + assert_eq!(m.capacity(), 0); + + let mut m = HM::new(); + m.reserve(0); + assert_eq!(m.capacity(), 0); + } + + #[test] + fn test_create_capacity_zero() { + let mut m = HashMap::with_capacity(0); + + assert!(m.insert(1, 1).is_none()); + + assert!(m.contains_key(&1)); + assert!(!m.contains_key(&0)); + } + + #[test] + fn test_insert() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1, 2).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2, 4).is_none()); + assert_eq!(m.len(), 2); + assert_eq!(*m.get(&1).unwrap(), 2); + assert_eq!(*m.get(&2).unwrap(), 4); + } + + #[test] + fn test_clone() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1, 2).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2, 4).is_none()); + assert_eq!(m.len(), 2); + let m2 = m.clone(); + assert_eq!(*m2.get(&1).unwrap(), 2); + assert_eq!(*m2.get(&2).unwrap(), 4); + assert_eq!(m2.len(), 2); + } + + thread_local! { static DROP_VECTOR: RefCell> = RefCell::new(Vec::new()) } + + #[derive(Hash, PartialEq, Eq)] + struct Dropable { + k: usize, + } + + impl Dropable { + fn new(k: usize) -> Dropable { + DROP_VECTOR.with(|slot| { + slot.borrow_mut()[k] += 1; + }); + + Dropable { k: k } + } + } + + impl Drop for Dropable { + fn drop(&mut self) { + DROP_VECTOR.with(|slot| { + slot.borrow_mut()[self.k] -= 1; + }); + } + } + + impl Clone for Dropable { + fn clone(&self) -> Dropable { + Dropable::new(self.k) + } + } + + #[test] + fn test_drops() { + DROP_VECTOR.with(|slot| { + *slot.borrow_mut() = vec![0; 200]; + }); + + { + let mut m = HashMap::new(); + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + + for i in 0..100 { + let d1 = Dropable::new(i); + let d2 = Dropable::new(i + 100); + m.insert(d1, d2); + } + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 1); + } + }); + + for i in 0..50 { + let k = Dropable::new(i); + let v = m.remove(&k); + + assert!(v.is_some()); + + DROP_VECTOR.with(|v| { + assert_eq!(v.borrow()[i], 1); + assert_eq!(v.borrow()[i + 100], 1); + }); + } + + DROP_VECTOR.with(|v| { + for i in 0..50 { + assert_eq!(v.borrow()[i], 0); + assert_eq!(v.borrow()[i + 100], 0); + } + + for i in 50..100 { + assert_eq!(v.borrow()[i], 1); + assert_eq!(v.borrow()[i + 100], 1); + } + }); + } + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + } + + #[test] + fn test_into_iter_drops() { + DROP_VECTOR.with(|v| { + *v.borrow_mut() = vec![0; 200]; + }); + + let hm = { + let mut hm = HashMap::new(); + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + + for i in 0..100 { + let d1 = Dropable::new(i); + let d2 = Dropable::new(i + 100); + hm.insert(d1, d2); + } + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 1); + } + }); + + hm + }; + + // By the way, ensure that cloning doesn't screw up the dropping. + drop(hm.clone()); + + { + let mut half = hm.into_iter().take(50); + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 1); + } + }); + + for _ in half.by_ref() {} + + DROP_VECTOR.with(|v| { + let nk = (0..100).filter(|&i| v.borrow()[i] == 1).count(); + + let nv = (0..100).filter(|&i| v.borrow()[i + 100] == 1).count(); + + assert_eq!(nk, 50); + assert_eq!(nv, 50); + }); + }; + + DROP_VECTOR.with(|v| { + for i in 0..200 { + assert_eq!(v.borrow()[i], 0); + } + }); + } + + #[test] + fn test_empty_remove() { + let mut m: HashMap = HashMap::new(); + assert_eq!(m.remove(&0), None); + } + + #[test] + fn test_empty_entry() { + let mut m: HashMap = HashMap::new(); + match m.entry(0) { + Occupied(_) => panic!(), + Vacant(_) => {}, + } + assert!(*m.entry(0).or_insert(true)); + assert_eq!(m.len(), 1); + } + + #[test] + fn test_empty_iter() { + let mut m: HashMap = HashMap::new(); + assert_eq!(m.drain().next(), None); + assert_eq!(m.keys().next(), None); + assert_eq!(m.values().next(), None); + assert_eq!(m.values_mut().next(), None); + assert_eq!(m.iter().next(), None); + assert_eq!(m.iter_mut().next(), None); + assert_eq!(m.len(), 0); + assert!(m.is_empty()); + assert_eq!(m.into_iter().next(), None); + } + + #[test] + fn test_lots_of_insertions() { + let mut m = HashMap::new(); + + // Try this a few times to make sure we never screw up the hashmap's + // internal state. + for _ in 0..10 { + assert!(m.is_empty()); + + for i in 1..1001 { + assert!(m.insert(i, i).is_none()); + + for j in 1..i + 1 { + let r = m.get(&j); + assert_eq!(r, Some(&j)); + } + + for j in i + 1..1001 { + let r = m.get(&j); + assert_eq!(r, None); + } + } + + for i in 1001..2001 { + assert!(!m.contains_key(&i)); + } + + // remove forwards + for i in 1..1001 { + assert!(m.remove(&i).is_some()); + + for j in 1..i + 1 { + assert!(!m.contains_key(&j)); + } + + for j in i + 1..1001 { + assert!(m.contains_key(&j)); + } + } + + for i in 1..1001 { + assert!(!m.contains_key(&i)); + } + + for i in 1..1001 { + assert!(m.insert(i, i).is_none()); + } + + // remove backwards + for i in (1..1001).rev() { + assert!(m.remove(&i).is_some()); + + for j in i..1001 { + assert!(!m.contains_key(&j)); + } + + for j in 1..i { + assert!(m.contains_key(&j)); + } + } + } + } + + #[test] + fn test_find_mut() { + let mut m = HashMap::new(); + assert!(m.insert(1, 12).is_none()); + assert!(m.insert(2, 8).is_none()); + assert!(m.insert(5, 14).is_none()); + let new = 100; + match m.get_mut(&5) { + None => panic!(), + Some(x) => *x = new, + } + assert_eq!(m.get(&5), Some(&new)); + } + + #[test] + fn test_insert_overwrite() { + let mut m = HashMap::new(); + assert!(m.insert(1, 2).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert!(!m.insert(1, 3).is_none()); + assert_eq!(*m.get(&1).unwrap(), 3); + } + + #[test] + fn test_insert_conflicts() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1, 2).is_none()); + assert!(m.insert(5, 3).is_none()); + assert!(m.insert(9, 4).is_none()); + assert_eq!(*m.get(&9).unwrap(), 4); + assert_eq!(*m.get(&5).unwrap(), 3); + assert_eq!(*m.get(&1).unwrap(), 2); + } + + #[test] + fn test_conflict_remove() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1, 2).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert!(m.insert(5, 3).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert_eq!(*m.get(&5).unwrap(), 3); + assert!(m.insert(9, 4).is_none()); + assert_eq!(*m.get(&1).unwrap(), 2); + assert_eq!(*m.get(&5).unwrap(), 3); + assert_eq!(*m.get(&9).unwrap(), 4); + assert!(m.remove(&1).is_some()); + assert_eq!(*m.get(&9).unwrap(), 4); + assert_eq!(*m.get(&5).unwrap(), 3); + } + + #[test] + fn test_is_empty() { + let mut m = HashMap::with_capacity(4); + assert!(m.insert(1, 2).is_none()); + assert!(!m.is_empty()); + assert!(m.remove(&1).is_some()); + assert!(m.is_empty()); + } + + #[test] + fn test_pop() { + let mut m = HashMap::new(); + m.insert(1, 2); + assert_eq!(m.remove(&1), Some(2)); + assert_eq!(m.remove(&1), None); + } + + #[test] + fn test_iterate() { + let mut m = HashMap::with_capacity(4); + for i in 0..32 { + assert!(m.insert(i, i * 2).is_none()); + } + assert_eq!(m.len(), 32); + + let mut observed: u32 = 0; + + for (k, v) in &m { + assert_eq!(*v, *k * 2); + observed |= 1 << *k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_keys() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: HashMap<_, _> = vec.into_iter().collect(); + let keys: Vec<_> = map.keys().cloned().collect(); + assert_eq!(keys.len(), 3); + assert!(keys.contains(&1)); + assert!(keys.contains(&2)); + assert!(keys.contains(&3)); + } + + #[test] + fn test_values() { + let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')]; + let map: HashMap<_, _> = vec.into_iter().collect(); + let values: Vec<_> = map.values().cloned().collect(); + assert_eq!(values.len(), 3); + assert!(values.contains(&'a')); + assert!(values.contains(&'b')); + assert!(values.contains(&'c')); + } + + #[test] + fn test_values_mut() { + let vec = vec![(1, 1), (2, 2), (3, 3)]; + let mut map: HashMap<_, _> = vec.into_iter().collect(); + for value in map.values_mut() { + *value = (*value) * 2 + } + let values: Vec<_> = map.values().cloned().collect(); + assert_eq!(values.len(), 3); + assert!(values.contains(&2)); + assert!(values.contains(&4)); + assert!(values.contains(&6)); + } + + #[test] + fn test_find() { + let mut m = HashMap::new(); + assert!(m.get(&1).is_none()); + m.insert(1, 2); + match m.get(&1) { + None => panic!(), + Some(v) => assert_eq!(*v, 2), + } + } + + #[test] + fn test_eq() { + let mut m1 = HashMap::new(); + m1.insert(1, 2); + m1.insert(2, 3); + m1.insert(3, 4); + + let mut m2 = HashMap::new(); + m2.insert(1, 2); + m2.insert(2, 3); + + assert_ne!(m1, m2); + + m2.insert(3, 4); + + assert_eq!(m1, m2); + } + + #[test] + fn test_show() { + let mut map = HashMap::new(); + let empty: HashMap = HashMap::new(); + + map.insert(1, 2); + map.insert(3, 4); + + let map_str = format!("{:?}", map); + + assert!(map_str == "{1: 2, 3: 4}" || map_str == "{3: 4, 1: 2}"); + assert_eq!(format!("{:?}", empty), "{}"); + } + + #[test] + fn test_expand() { + let mut m = HashMap::new(); + + assert_eq!(m.len(), 0); + assert!(m.is_empty()); + + let mut i = 0; + let old_raw_cap = m.raw_capacity(); + while old_raw_cap == m.raw_capacity() { + m.insert(i, i); + i += 1; + } + + assert_eq!(m.len(), i); + assert!(!m.is_empty()); + } + + #[test] + fn test_behavior_resize_policy() { + let mut m = HashMap::new(); + + assert_eq!(m.len(), 0); + assert_eq!(m.raw_capacity(), 0); + assert!(m.is_empty()); + + m.insert(0, 0); + m.remove(&0); + assert!(m.is_empty()); + let initial_raw_cap = m.raw_capacity(); + m.reserve(initial_raw_cap); + let raw_cap = m.raw_capacity(); + + assert_eq!(raw_cap, initial_raw_cap * 2); + + let mut i = 0; + for _ in 0..raw_cap * 3 / 4 { + m.insert(i, i); + i += 1; + } + // three quarters full + + assert_eq!(m.len(), i); + assert_eq!(m.raw_capacity(), raw_cap); + + for _ in 0..raw_cap / 4 { + m.insert(i, i); + i += 1; + } + // half full + + let new_raw_cap = m.raw_capacity(); + assert_eq!(new_raw_cap, raw_cap * 2); + + for _ in 0..raw_cap / 2 - 1 { + i -= 1; + m.remove(&i); + assert_eq!(m.raw_capacity(), new_raw_cap); + } + // A little more than one quarter full. + m.shrink_to_fit(); + assert_eq!(m.raw_capacity(), raw_cap); + // again, a little more than half full + for _ in 0..raw_cap / 2 - 1 { + i -= 1; + m.remove(&i); + } + m.shrink_to_fit(); + + assert_eq!(m.len(), i); + assert!(!m.is_empty()); + assert_eq!(m.raw_capacity(), initial_raw_cap); + } + + #[test] + fn test_reserve_shrink_to_fit() { + let mut m = HashMap::new(); + m.insert(0, 0); + m.remove(&0); + assert!(m.capacity() >= m.len()); + for i in 0..128 { + m.insert(i, i); + } + m.reserve(256); + + let usable_cap = m.capacity(); + for i in 128..(128 + 256) { + m.insert(i, i); + assert_eq!(m.capacity(), usable_cap); + } + + for i in 100..(128 + 256) { + assert_eq!(m.remove(&i), Some(i)); + } + m.shrink_to_fit(); + + assert_eq!(m.len(), 100); + assert!(!m.is_empty()); + assert!(m.capacity() >= m.len()); + + for i in 0..100 { + assert_eq!(m.remove(&i), Some(i)); + } + m.shrink_to_fit(); + m.insert(0, 0); + + assert_eq!(m.len(), 1); + assert!(m.capacity() >= m.len()); + assert_eq!(m.remove(&0), Some(0)); + } + + #[test] + fn test_from_iter() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap<_, _> = xs.iter().cloned().collect(); + + for &(k, v) in &xs { + assert_eq!(map.get(&k), Some(&v)); + } + } + + #[test] + fn test_size_hint() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.size_hint(), (3, Some(3))); + } + + #[test] + fn test_iter_len() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.len(), 3); + } + + #[test] + fn test_mut_size_hint() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let mut map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter_mut(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.size_hint(), (3, Some(3))); + } + + #[test] + fn test_iter_mut_len() { + let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; + + let mut map: HashMap<_, _> = xs.iter().cloned().collect(); + + let mut iter = map.iter_mut(); + + for _ in iter.by_ref().take(3) {} + + assert_eq!(iter.len(), 3); + } + + #[test] + fn test_index() { + let mut map = HashMap::new(); + + map.insert(1, 2); + map.insert(2, 1); + map.insert(3, 4); + + assert_eq!(map[&2], 1); + } + + #[test] + #[should_panic] + fn test_index_nonexistent() { + let mut map = HashMap::new(); + + map.insert(1, 2); + map.insert(2, 1); + map.insert(3, 4); + + map[&4]; + } + + #[test] + fn test_entry() { + let xs = [(1, 10), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)]; + + let mut map: HashMap<_, _> = xs.iter().cloned().collect(); + + // Existing key (insert) + match map.entry(1) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + assert_eq!(view.get(), &10); + assert_eq!(view.insert(100), 10); + }, + } + assert_eq!(map.get(&1).unwrap(), &100); + assert_eq!(map.len(), 6); + + // Existing key (update) + match map.entry(2) { + Vacant(_) => unreachable!(), + Occupied(mut view) => { + let v = view.get_mut(); + let new_v = (*v) * 10; + *v = new_v; + }, + } + assert_eq!(map.get(&2).unwrap(), &200); + assert_eq!(map.len(), 6); + + // Existing key (take) + match map.entry(3) { + Vacant(_) => unreachable!(), + Occupied(view) => { + assert_eq!(view.remove(), 30); + }, + } + assert_eq!(map.get(&3), None); + assert_eq!(map.len(), 5); + + // Inexistent key (insert) + match map.entry(10) { + Occupied(_) => unreachable!(), + Vacant(view) => { + assert_eq!(*view.insert(1000), 1000); + }, + } + assert_eq!(map.get(&10).unwrap(), &1000); + assert_eq!(map.len(), 6); + } + + #[test] + fn test_entry_take_doesnt_corrupt() { + #![allow(deprecated)] //rand + // Test for #19292 + fn check(m: &HashMap) { + for k in m.keys() { + assert!(m.contains_key(k), "{} is in keys() but not in the map?", k); + } + } + + let mut m = HashMap::new(); + let mut rng = thread_rng(); + + // Populate the map with some items. + for _ in 0..50 { + let x = rng.gen_range(-10, 10); + m.insert(x, ()); + } + + for i in 0..1000 { + let x = rng.gen_range(-10, 10); + match m.entry(x) { + Vacant(_) => {}, + Occupied(e) => { + println!("{}: remove {}", i, x); + e.remove(); + }, + } + + check(&m); + } + } + + #[test] + fn test_extend_ref() { + let mut a = HashMap::new(); + a.insert(1, "one"); + let mut b = HashMap::new(); + b.insert(2, "two"); + b.insert(3, "three"); + + a.extend(&b); + + assert_eq!(a.len(), 3); + assert_eq!(a[&1], "one"); + assert_eq!(a[&2], "two"); + assert_eq!(a[&3], "three"); + } + + #[test] + fn test_capacity_not_less_than_len() { + let mut a = HashMap::new(); + let mut item = 0; + + for _ in 0..116 { + a.insert(item, 0); + item += 1; + } + + assert!(a.capacity() > a.len()); + + let free = a.capacity() - a.len(); + for _ in 0..free { + a.insert(item, 0); + item += 1; + } + + assert_eq!(a.len(), a.capacity()); + + // Insert at capacity should cause allocation. + a.insert(item, 0); + assert!(a.capacity() > a.len()); + } + + #[test] + fn test_occupied_entry_key() { + let mut a = HashMap::new(); + let key = "hello there"; + let value = "value goes here"; + assert!(a.is_empty()); + a.insert(key.clone(), value.clone()); + assert_eq!(a.len(), 1); + assert_eq!(a[key], value); + + match a.entry(key.clone()) { + Vacant(_) => panic!(), + Occupied(e) => assert_eq!(key, *e.key()), + } + assert_eq!(a.len(), 1); + assert_eq!(a[key], value); + } + + #[test] + fn test_vacant_entry_key() { + let mut a = HashMap::new(); + let key = "hello there"; + let value = "value goes here"; + + assert!(a.is_empty()); + match a.entry(key.clone()) { + Occupied(_) => panic!(), + Vacant(e) => { + assert_eq!(key, *e.key()); + e.insert(value.clone()); + }, + } + assert_eq!(a.len(), 1); + assert_eq!(a[key], value); + } + + #[test] + fn test_retain() { + let mut map: HashMap = (0..100).map(|x| (x, x * 10)).collect(); + + map.retain(|&k, _| k % 2 == 0); + assert_eq!(map.len(), 50); + assert_eq!(map[&2], 20); + assert_eq!(map[&4], 40); + assert_eq!(map[&6], 60); + } + + #[test] + fn test_adaptive() { + const TEST_LEN: usize = 5000; + // by cloning we get maps with the same hasher seed + let mut first = HashMap::new(); + let mut second = first.clone(); + first.extend((0..TEST_LEN).map(|i| (i, i))); + second.extend((TEST_LEN..TEST_LEN * 2).map(|i| (i, i))); + + for (&k, &v) in &second { + let prev_cap = first.capacity(); + let expect_grow = first.len() == prev_cap; + first.insert(k, v); + if !expect_grow && first.capacity() != prev_cap { + return; + } + } + panic!("Adaptive early resize failed"); + } +} diff --git a/servo/components/hashglobe/src/hash_set.rs b/servo/components/hashglobe/src/hash_set.rs new file mode 100644 index 000000000000..ef373ae37106 --- /dev/null +++ b/servo/components/hashglobe/src/hash_set.rs @@ -0,0 +1,1648 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::borrow::Borrow; +use std::fmt; +use std::hash::{BuildHasher, Hash}; +use std::iter::{Chain, FromIterator}; +use std::ops::{BitAnd, BitOr, BitXor, Sub}; + +use super::hash_map::{self, HashMap, Keys, RandomState}; +use super::Recover; + +use crate::FailedAllocationError; + +// Future Optimization (FIXME!) +// ============================= +// +// Iteration over zero sized values is a noop. There is no need +// for `bucket.val` in the case of HashSet. I suppose we would need HKT +// to get rid of it properly. + +/// A hash set implemented as a `HashMap` where the value is `()`. +/// +/// As with the [`HashMap`] type, a `HashSet` requires that the elements +/// implement the [`Eq`] and [`Hash`] traits. This can frequently be achieved by +/// using `#[derive(PartialEq, Eq, Hash)]`. If you implement these yourself, +/// it is important that the following property holds: +/// +/// ```text +/// k1 == k2 -> hash(k1) == hash(k2) +/// ``` +/// +/// In other words, if two keys are equal, their hashes must be equal. +/// +/// +/// It is a logic error for an item to be modified in such a way that the +/// item's hash, as determined by the [`Hash`] trait, or its equality, as +/// determined by the [`Eq`] trait, changes while it is in the set. This is +/// normally only possible through [`Cell`], [`RefCell`], global state, I/O, or +/// unsafe code. +/// +/// # Examples +/// +/// ``` +/// use std::collections::HashSet; +/// // Type inference lets us omit an explicit type signature (which +/// // would be `HashSet<&str>` in this example). +/// let mut books = HashSet::new(); +/// +/// // Add some books. +/// books.insert("A Dance With Dragons"); +/// books.insert("To Kill a Mockingbird"); +/// books.insert("The Odyssey"); +/// books.insert("The Great Gatsby"); +/// +/// // Check for a specific one. +/// if !books.contains("The Winds of Winter") { +/// println!("We have {} books, but The Winds of Winter ain't one.", +/// books.len()); +/// } +/// +/// // Remove a book. +/// books.remove("The Odyssey"); +/// +/// // Iterate over everything. +/// for book in &books { +/// println!("{}", book); +/// } +/// ``` +/// +/// The easiest way to use `HashSet` with a custom type is to derive +/// [`Eq`] and [`Hash`]. We must also derive [`PartialEq`], this will in the +/// future be implied by [`Eq`]. +/// +/// ``` +/// use std::collections::HashSet; +/// #[derive(Hash, Eq, PartialEq, Debug)] +/// struct Viking<'a> { +/// name: &'a str, +/// power: usize, +/// } +/// +/// let mut vikings = HashSet::new(); +/// +/// vikings.insert(Viking { name: "Einar", power: 9 }); +/// vikings.insert(Viking { name: "Einar", power: 9 }); +/// vikings.insert(Viking { name: "Olaf", power: 4 }); +/// vikings.insert(Viking { name: "Harald", power: 8 }); +/// +/// // Use derived implementation to print the vikings. +/// for x in &vikings { +/// println!("{:?}", x); +/// } +/// ``` +/// +/// A `HashSet` with fixed list of elements can be initialized from an array: +/// +/// ``` +/// use std::collections::HashSet; +/// +/// fn main() { +/// let viking_names: HashSet<&str> = +/// [ "Einar", "Olaf", "Harald" ].iter().cloned().collect(); +/// // use the values stored in the set +/// } +/// ``` +/// +/// [`Cell`]: ../../std/cell/struct.Cell.html +/// [`Eq`]: ../../std/cmp/trait.Eq.html +/// [`Hash`]: ../../std/hash/trait.Hash.html +/// [`HashMap`]: struct.HashMap.html +/// [`PartialEq`]: ../../std/cmp/trait.PartialEq.html +/// [`RefCell`]: ../../std/cell/struct.RefCell.html +#[derive(Clone)] +pub struct HashSet { + map: HashMap, +} + +impl HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + /// Creates a new empty hash set which will use the given hasher to hash + /// keys. + /// + /// The hash set is also created with the default initial capacity. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut set = HashSet::with_hasher(s); + /// set.insert(2); + /// ``` + #[inline] + pub fn with_hasher(hasher: S) -> HashSet { + HashSet { + map: HashMap::with_hasher(hasher), + } + } + + /// Creates an empty `HashSet` with with the specified capacity, using + /// `hasher` to hash the keys. + /// + /// The hash set will be able to hold at least `capacity` elements without + /// reallocating. If `capacity` is 0, the hash set will not allocate. + /// + /// Warning: `hasher` is normally randomly generated, and + /// is designed to allow `HashSet`s to be resistant to attacks that + /// cause many collisions and very poor performance. Setting it + /// manually using this function can expose a DoS attack vector. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// use std::collections::hash_map::RandomState; + /// + /// let s = RandomState::new(); + /// let mut set = HashSet::with_capacity_and_hasher(10, s); + /// set.insert(1); + /// ``` + #[inline] + pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> HashSet { + HashSet { + map: HashMap::with_capacity_and_hasher(capacity, hasher), + } + } + + /// Returns a reference to the set's [`BuildHasher`]. + /// + /// [`BuildHasher`]: ../../std/hash/trait.BuildHasher.html + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// use std::collections::hash_map::RandomState; + /// + /// let hasher = RandomState::new(); + /// let set: HashSet = HashSet::with_hasher(hasher); + /// let hasher: &RandomState = set.hasher(); + /// ``` + pub fn hasher(&self) -> &S { + self.map.hasher() + } + + /// Returns the number of elements the set can hold without reallocating. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let set: HashSet = HashSet::with_capacity(100); + /// assert!(set.capacity() >= 100); + /// ``` + #[inline] + pub fn capacity(&self) -> usize { + self.map.capacity() + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `HashSet`. The collection may reserve more space to avoid + /// frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new allocation size overflows `usize`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set: HashSet = HashSet::new(); + /// set.reserve(10); + /// assert!(set.capacity() >= 10); + /// ``` + pub fn reserve(&mut self, additional: usize) { + self.map.reserve(additional) + } + + /// Shrinks the capacity of the set as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::with_capacity(100); + /// set.insert(1); + /// set.insert(2); + /// assert!(set.capacity() >= 100); + /// set.shrink_to_fit(); + /// assert!(set.capacity() >= 2); + /// ``` + pub fn shrink_to_fit(&mut self) { + self.map.shrink_to_fit() + } + + /// An iterator visiting all elements in arbitrary order. + /// The iterator element type is `&'a T`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a"); + /// set.insert("b"); + /// + /// // Will print in an arbitrary order. + /// for x in set.iter() { + /// println!("{}", x); + /// } + /// ``` + pub fn iter(&self) -> Iter { + Iter { + iter: self.map.keys(), + } + } + + /// Visits the values representing the difference, + /// i.e. the values that are in `self` but not in `other`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Can be seen as `a - b`. + /// for x in a.difference(&b) { + /// println!("{}", x); // Print 1 + /// } + /// + /// let diff: HashSet<_> = a.difference(&b).collect(); + /// assert_eq!(diff, [1].iter().collect()); + /// + /// // Note that difference is not symmetric, + /// // and `b - a` means something else: + /// let diff: HashSet<_> = b.difference(&a).collect(); + /// assert_eq!(diff, [4].iter().collect()); + /// ``` + pub fn difference<'a>(&'a self, other: &'a HashSet) -> Difference<'a, T, S> { + Difference { + iter: self.iter(), + other, + } + } + + /// Visits the values representing the symmetric difference, + /// i.e. the values that are in `self` or in `other` but not in both. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Print 1, 4 in arbitrary order. + /// for x in a.symmetric_difference(&b) { + /// println!("{}", x); + /// } + /// + /// let diff1: HashSet<_> = a.symmetric_difference(&b).collect(); + /// let diff2: HashSet<_> = b.symmetric_difference(&a).collect(); + /// + /// assert_eq!(diff1, diff2); + /// assert_eq!(diff1, [1, 4].iter().collect()); + /// ``` + pub fn symmetric_difference<'a>( + &'a self, + other: &'a HashSet, + ) -> SymmetricDifference<'a, T, S> { + SymmetricDifference { + iter: self.difference(other).chain(other.difference(self)), + } + } + + /// Visits the values representing the intersection, + /// i.e. the values that are both in `self` and `other`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Print 2, 3 in arbitrary order. + /// for x in a.intersection(&b) { + /// println!("{}", x); + /// } + /// + /// let intersection: HashSet<_> = a.intersection(&b).collect(); + /// assert_eq!(intersection, [2, 3].iter().collect()); + /// ``` + pub fn intersection<'a>(&'a self, other: &'a HashSet) -> Intersection<'a, T, S> { + Intersection { + iter: self.iter(), + other, + } + } + + /// Visits the values representing the union, + /// i.e. all the values in `self` or `other`, without duplicates. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let b: HashSet<_> = [4, 2, 3, 4].iter().cloned().collect(); + /// + /// // Print 1, 2, 3, 4 in arbitrary order. + /// for x in a.union(&b) { + /// println!("{}", x); + /// } + /// + /// let union: HashSet<_> = a.union(&b).collect(); + /// assert_eq!(union, [1, 2, 3, 4].iter().collect()); + /// ``` + pub fn union<'a>(&'a self, other: &'a HashSet) -> Union<'a, T, S> { + Union { + iter: self.iter().chain(other.difference(self)), + } + } + + /// Returns the number of elements in the set. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// assert_eq!(v.len(), 0); + /// v.insert(1); + /// assert_eq!(v.len(), 1); + /// ``` + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns true if the set contains no elements. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// assert!(v.is_empty()); + /// v.insert(1); + /// assert!(!v.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Clears the set, returning all elements in an iterator. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// assert!(!set.is_empty()); + /// + /// // print 1, 2, 3 in an arbitrary order + /// for i in set.drain() { + /// println!("{}", i); + /// } + /// + /// assert!(set.is_empty()); + /// ``` + #[inline] + pub fn drain(&mut self) -> Drain { + Drain { + iter: self.map.drain(), + } + } + + /// Clears the set, removing all values. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut v = HashSet::new(); + /// v.insert(1); + /// v.clear(); + /// assert!(v.is_empty()); + /// ``` + pub fn clear(&mut self) + where + T: 'static, + { + self.map.clear() + } + + /// Returns `true` if the set contains a value. + /// + /// The value may be any borrowed form of the set's value type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the value type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let set: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// assert_eq!(set.contains(&1), true); + /// assert_eq!(set.contains(&4), false); + /// ``` + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: Hash + Eq, + { + self.map.contains_key(value) + } + + /// Returns a reference to the value in the set, if any, that is equal to the given value. + /// + /// The value may be any borrowed form of the set's value type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the value type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + pub fn get(&self, value: &Q) -> Option<&T> + where + T: Borrow, + Q: Hash + Eq, + { + Recover::get(&self.map, value) + } + + /// Returns `true` if `self` has no elements in common with `other`. + /// This is equivalent to checking for an empty intersection. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let mut b = HashSet::new(); + /// + /// assert_eq!(a.is_disjoint(&b), true); + /// b.insert(4); + /// assert_eq!(a.is_disjoint(&b), true); + /// b.insert(1); + /// assert_eq!(a.is_disjoint(&b), false); + /// ``` + pub fn is_disjoint(&self, other: &HashSet) -> bool { + self.iter().all(|v| !other.contains(v)) + } + + /// Returns `true` if the set is a subset of another, + /// i.e. `other` contains at least all the values in `self`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let sup: HashSet<_> = [1, 2, 3].iter().cloned().collect(); + /// let mut set = HashSet::new(); + /// + /// assert_eq!(set.is_subset(&sup), true); + /// set.insert(2); + /// assert_eq!(set.is_subset(&sup), true); + /// set.insert(4); + /// assert_eq!(set.is_subset(&sup), false); + /// ``` + pub fn is_subset(&self, other: &HashSet) -> bool { + self.iter().all(|v| other.contains(v)) + } + + /// Returns `true` if the set is a superset of another, + /// i.e. `self` contains at least all the values in `other`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let sub: HashSet<_> = [1, 2].iter().cloned().collect(); + /// let mut set = HashSet::new(); + /// + /// assert_eq!(set.is_superset(&sub), false); + /// + /// set.insert(0); + /// set.insert(1); + /// assert_eq!(set.is_superset(&sub), false); + /// + /// set.insert(2); + /// assert_eq!(set.is_superset(&sub), true); + /// ``` + #[inline] + pub fn is_superset(&self, other: &HashSet) -> bool { + other.is_subset(self) + } + + /// Adds a value to the set. + /// + /// If the set did not have this value present, `true` is returned. + /// + /// If the set did have this value present, `false` is returned. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::new(); + /// + /// assert_eq!(set.insert(2), true); + /// assert_eq!(set.insert(2), false); + /// assert_eq!(set.len(), 1); + /// ``` + pub fn insert(&mut self, value: T) -> bool { + self.map.insert(value, ()).is_none() + } + + /// Fallible version of `insert`. + #[inline] + pub fn try_insert(&mut self, value: T) -> Result { + Ok(self.map.try_insert(value, ())?.is_none()) + } + + /// Adds a value to the set, replacing the existing value, if any, that is equal to the given + /// one. Returns the replaced value. + pub fn replace(&mut self, value: T) -> Option { + Recover::replace(&mut self.map, value) + } + + /// Removes a value from the set. Returns `true` if the value was + /// present in the set. + /// + /// The value may be any borrowed form of the set's value type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the value type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let mut set = HashSet::new(); + /// + /// set.insert(2); + /// assert_eq!(set.remove(&2), true); + /// assert_eq!(set.remove(&2), false); + /// ``` + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + pub fn remove(&mut self, value: &Q) -> bool + where + T: Borrow, + Q: Hash + Eq, + { + self.map.remove(value).is_some() + } + + /// Removes and returns the value in the set, if any, that is equal to the given one. + /// + /// The value may be any borrowed form of the set's value type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the value type. + /// + /// [`Eq`]: ../../std/cmp/trait.Eq.html + /// [`Hash`]: ../../std/hash/trait.Hash.html + pub fn take(&mut self, value: &Q) -> Option + where + T: Borrow, + Q: Hash + Eq, + { + Recover::take(&mut self.map, value) + } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all elements `e` such that `f(&e)` returns `false`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let xs = [1,2,3,4,5,6]; + /// let mut set: HashSet = xs.iter().cloned().collect(); + /// set.retain(|&k| k % 2 == 0); + /// assert_eq!(set.len(), 3); + /// ``` + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&T) -> bool, + { + self.map.retain(|k, _| f(k)); + } +} + +impl PartialEq for HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + fn eq(&self, other: &HashSet) -> bool { + if self.len() != other.len() { + return false; + } + + self.iter().all(|key| other.contains(key)) + } +} + +impl Eq for HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ +} + +impl fmt::Debug for HashSet +where + T: Eq + Hash + fmt::Debug, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl FromIterator for HashSet +where + T: Eq + Hash, + S: BuildHasher + Default, +{ + fn from_iter>(iter: I) -> HashSet { + let mut set = HashSet::with_hasher(Default::default()); + set.extend(iter); + set + } +} + +impl Extend for HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + fn extend>(&mut self, iter: I) { + self.map.extend(iter.into_iter().map(|k| (k, ()))); + } +} + +impl<'a, T, S> Extend<&'a T> for HashSet +where + T: 'a + Eq + Hash + Copy, + S: BuildHasher, +{ + fn extend>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } +} + +impl Default for HashSet +where + T: Eq + Hash, + S: BuildHasher + Default, +{ + /// Creates an empty `HashSet` with the `Default` value for the hasher. + fn default() -> HashSet { + HashSet { + map: HashMap::default(), + } + } +} + +impl<'a, 'b, T, S> BitOr<&'b HashSet> for &'a HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = HashSet; + + /// Returns the union of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a | &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 3, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitor(self, rhs: &HashSet) -> HashSet { + self.union(rhs).cloned().collect() + } +} + +impl<'a, 'b, T, S> BitAnd<&'b HashSet> for &'a HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = HashSet; + + /// Returns the intersection of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![2, 3, 4].into_iter().collect(); + /// + /// let set = &a & &b; + /// + /// let mut i = 0; + /// let expected = [2, 3]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitand(self, rhs: &HashSet) -> HashSet { + self.intersection(rhs).cloned().collect() + } +} + +impl<'a, 'b, T, S> BitXor<&'b HashSet> for &'a HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = HashSet; + + /// Returns the symmetric difference of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a ^ &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitxor(self, rhs: &HashSet) -> HashSet { + self.symmetric_difference(rhs).cloned().collect() + } +} + +impl<'a, 'b, T, S> Sub<&'b HashSet> for &'a HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = HashSet; + + /// Returns the difference of `self` and `rhs` as a new `HashSet`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// + /// let a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a - &b; + /// + /// let mut i = 0; + /// let expected = [1, 2]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn sub(self, rhs: &HashSet) -> HashSet { + self.difference(rhs).cloned().collect() + } +} + +/// An iterator over the items of a `HashSet`. +/// +/// This `struct` is created by the [`iter`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`iter`]: struct.HashSet.html#method.iter +pub struct Iter<'a, K: 'a> { + iter: Keys<'a, K, ()>, +} + +/// An owning iterator over the items of a `HashSet`. +/// +/// This `struct` is created by the [`into_iter`] method on [`HashSet`][`HashSet`] +/// (provided by the `IntoIterator` trait). See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`into_iter`]: struct.HashSet.html#method.into_iter +pub struct IntoIter { + iter: hash_map::IntoIter, +} + +/// A draining iterator over the items of a `HashSet`. +/// +/// This `struct` is created by the [`drain`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`drain`]: struct.HashSet.html#method.drain +pub struct Drain<'a, K: 'static> { + iter: hash_map::Drain<'a, K, ()>, +} + +/// A lazy iterator producing elements in the intersection of `HashSet`s. +/// +/// This `struct` is created by the [`intersection`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`intersection`]: struct.HashSet.html#method.intersection +pub struct Intersection<'a, T: 'a, S: 'a> { + // iterator of the first set + iter: Iter<'a, T>, + // the second set + other: &'a HashSet, +} + +/// A lazy iterator producing elements in the difference of `HashSet`s. +/// +/// This `struct` is created by the [`difference`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`difference`]: struct.HashSet.html#method.difference +pub struct Difference<'a, T: 'a, S: 'a> { + // iterator of the first set + iter: Iter<'a, T>, + // the second set + other: &'a HashSet, +} + +/// A lazy iterator producing elements in the symmetric difference of `HashSet`s. +/// +/// This `struct` is created by the [`symmetric_difference`] method on +/// [`HashSet`]. See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`symmetric_difference`]: struct.HashSet.html#method.symmetric_difference +pub struct SymmetricDifference<'a, T: 'a, S: 'a> { + iter: Chain, Difference<'a, T, S>>, +} + +/// A lazy iterator producing elements in the union of `HashSet`s. +/// +/// This `struct` is created by the [`union`] method on [`HashSet`]. +/// See its documentation for more. +/// +/// [`HashSet`]: struct.HashSet.html +/// [`union`]: struct.HashSet.html#method.union +pub struct Union<'a, T: 'a, S: 'a> { + iter: Chain, Difference<'a, T, S>>, +} + +impl<'a, T, S> IntoIterator for &'a HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Iter<'a, T> { + self.iter() + } +} + +impl IntoIterator for HashSet +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = T; + type IntoIter = IntoIter; + + /// Creates a consuming iterator, that is, one that moves each value out + /// of the set in arbitrary order. The set cannot be used after calling + /// this. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashSet; + /// let mut set = HashSet::new(); + /// set.insert("a".to_string()); + /// set.insert("b".to_string()); + /// + /// // Not possible to collect to a Vec with a regular `.iter()`. + /// let v: Vec = set.into_iter().collect(); + /// + /// // Will print in an arbitrary order. + /// for x in &v { + /// println!("{}", x); + /// } + /// ``` + fn into_iter(self) -> IntoIter { + IntoIter { + iter: self.map.into_iter(), + } + } +} + +impl<'a, K> Clone for Iter<'a, K> { + fn clone(&self) -> Iter<'a, K> { + Iter { + iter: self.iter.clone(), + } + } +} +impl<'a, K> Iterator for Iter<'a, K> { + type Item = &'a K; + + fn next(&mut self) -> Option<&'a K> { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} +impl<'a, K> ExactSizeIterator for Iter<'a, K> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'a, K: fmt::Debug> fmt::Debug for Iter<'a, K> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl Iterator for IntoIter { + type Item = K; + + fn next(&mut self) -> Option { + self.iter.next().map(|(k, _)| k) + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl fmt::Debug for IntoIter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let entries_iter = self.iter.inner.iter().map(|(k, _)| k); + f.debug_list().entries(entries_iter).finish() + } +} + +impl<'a, K> Iterator for Drain<'a, K> { + type Item = K; + + fn next(&mut self) -> Option { + self.iter.next().map(|(k, _)| k) + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} +impl<'a, K> ExactSizeIterator for Drain<'a, K> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'a, K: fmt::Debug> fmt::Debug for Drain<'a, K> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let entries_iter = self.iter.inner.iter().map(|(k, _)| k); + f.debug_list().entries(entries_iter).finish() + } +} + +impl<'a, T, S> Clone for Intersection<'a, T, S> { + fn clone(&self) -> Intersection<'a, T, S> { + Intersection { + iter: self.iter.clone(), + ..*self + } + } +} + +impl<'a, T, S> Iterator for Intersection<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + loop { + let elt = self.iter.next()?; + if self.other.contains(elt) { + return Some(elt); + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (_, upper) = self.iter.size_hint(); + (0, upper) + } +} + +impl<'a, T, S> fmt::Debug for Intersection<'a, T, S> +where + T: fmt::Debug + Eq + Hash, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl<'a, T, S> Clone for Difference<'a, T, S> { + fn clone(&self) -> Difference<'a, T, S> { + Difference { + iter: self.iter.clone(), + ..*self + } + } +} + +impl<'a, T, S> Iterator for Difference<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + loop { + let elt = self.iter.next()?; + if !self.other.contains(elt) { + return Some(elt); + } + } + } + + fn size_hint(&self) -> (usize, Option) { + let (_, upper) = self.iter.size_hint(); + (0, upper) + } +} + +impl<'a, T, S> fmt::Debug for Difference<'a, T, S> +where + T: fmt::Debug + Eq + Hash, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl<'a, T, S> Clone for SymmetricDifference<'a, T, S> { + fn clone(&self) -> SymmetricDifference<'a, T, S> { + SymmetricDifference { + iter: self.iter.clone(), + } + } +} + +impl<'a, T, S> Iterator for SymmetricDifference<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a, T, S> fmt::Debug for SymmetricDifference<'a, T, S> +where + T: fmt::Debug + Eq + Hash, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl<'a, T, S> Clone for Union<'a, T, S> { + fn clone(&self) -> Union<'a, T, S> { + Union { + iter: self.iter.clone(), + } + } +} + +impl<'a, T, S> fmt::Debug for Union<'a, T, S> +where + T: fmt::Debug + Eq + Hash, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_list().entries(self.clone()).finish() + } +} + +impl<'a, T, S> Iterator for Union<'a, T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + type Item = &'a T; + + fn next(&mut self) -> Option<&'a T> { + self.iter.next() + } + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +#[allow(dead_code)] +fn assert_covariance() { + fn set<'new>(v: HashSet<&'static str>) -> HashSet<&'new str> { + v + } + fn iter<'a, 'new>(v: Iter<'a, &'static str>) -> Iter<'a, &'new str> { + v + } + fn into_iter<'new>(v: IntoIter<&'static str>) -> IntoIter<&'new str> { + v + } + fn difference<'a, 'new>( + v: Difference<'a, &'static str, RandomState>, + ) -> Difference<'a, &'new str, RandomState> { + v + } + fn symmetric_difference<'a, 'new>( + v: SymmetricDifference<'a, &'static str, RandomState>, + ) -> SymmetricDifference<'a, &'new str, RandomState> { + v + } + fn intersection<'a, 'new>( + v: Intersection<'a, &'static str, RandomState>, + ) -> Intersection<'a, &'new str, RandomState> { + v + } + fn union<'a, 'new>( + v: Union<'a, &'static str, RandomState>, + ) -> Union<'a, &'new str, RandomState> { + v + } + fn drain<'new>(d: Drain<'static, &'static str>) -> Drain<'new, &'new str> { + d + } +} + +#[cfg(test)] +mod test_set { + use super::hash_map::RandomState; + use super::HashSet; + + #[test] + fn test_zero_capacities() { + type HS = HashSet; + + let s = HS::new(); + assert_eq!(s.capacity(), 0); + + let s = HS::default(); + assert_eq!(s.capacity(), 0); + + let s = HS::with_hasher(RandomState::new()); + assert_eq!(s.capacity(), 0); + + let s = HS::with_capacity(0); + assert_eq!(s.capacity(), 0); + + let s = HS::with_capacity_and_hasher(0, RandomState::new()); + assert_eq!(s.capacity(), 0); + + let mut s = HS::new(); + s.insert(1); + s.insert(2); + s.remove(&1); + s.remove(&2); + s.shrink_to_fit(); + assert_eq!(s.capacity(), 0); + + let mut s = HS::new(); + s.reserve(0); + assert_eq!(s.capacity(), 0); + } + + #[test] + fn test_disjoint() { + let mut xs = HashSet::new(); + let mut ys = HashSet::new(); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(5)); + assert!(ys.insert(11)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(xs.insert(7)); + assert!(xs.insert(19)); + assert!(xs.insert(4)); + assert!(ys.insert(2)); + assert!(ys.insert(-11)); + assert!(xs.is_disjoint(&ys)); + assert!(ys.is_disjoint(&xs)); + assert!(ys.insert(7)); + assert!(!xs.is_disjoint(&ys)); + assert!(!ys.is_disjoint(&xs)); + } + + #[test] + fn test_subset_and_superset() { + let mut a = HashSet::new(); + assert!(a.insert(0)); + assert!(a.insert(5)); + assert!(a.insert(11)); + assert!(a.insert(7)); + + let mut b = HashSet::new(); + assert!(b.insert(0)); + assert!(b.insert(7)); + assert!(b.insert(19)); + assert!(b.insert(250)); + assert!(b.insert(11)); + assert!(b.insert(200)); + + assert!(!a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(!b.is_superset(&a)); + + assert!(b.insert(5)); + + assert!(a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(b.is_superset(&a)); + } + + #[test] + fn test_iterate() { + let mut a = HashSet::new(); + for i in 0..32 { + assert!(a.insert(i)); + } + let mut observed: u32 = 0; + for k in &a { + observed |= 1 << *k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_intersection() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(11)); + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(77)); + assert!(a.insert(103)); + assert!(a.insert(5)); + assert!(a.insert(-5)); + + assert!(b.insert(2)); + assert!(b.insert(11)); + assert!(b.insert(77)); + assert!(b.insert(-9)); + assert!(b.insert(-42)); + assert!(b.insert(5)); + assert!(b.insert(3)); + + let mut i = 0; + let expected = [3, 5, 11, 77]; + for x in a.intersection(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(3)); + assert!(b.insert(9)); + + let mut i = 0; + let expected = [1, 5, 11]; + for x in a.difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_symmetric_difference() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + + assert!(b.insert(-2)); + assert!(b.insert(3)); + assert!(b.insert(9)); + assert!(b.insert(14)); + assert!(b.insert(22)); + + let mut i = 0; + let expected = [-2, 1, 5, 11, 14, 22]; + for x in a.symmetric_difference(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_union() { + let mut a = HashSet::new(); + let mut b = HashSet::new(); + + assert!(a.insert(1)); + assert!(a.insert(3)); + assert!(a.insert(5)); + assert!(a.insert(9)); + assert!(a.insert(11)); + assert!(a.insert(16)); + assert!(a.insert(19)); + assert!(a.insert(24)); + + assert!(b.insert(-2)); + assert!(b.insert(1)); + assert!(b.insert(5)); + assert!(b.insert(9)); + assert!(b.insert(13)); + assert!(b.insert(19)); + + let mut i = 0; + let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24]; + for x in a.union(&b) { + assert!(expected.contains(x)); + i += 1 + } + assert_eq!(i, expected.len()); + } + + #[test] + fn test_from_iter() { + let xs = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + + let set: HashSet<_> = xs.iter().cloned().collect(); + + for x in &xs { + assert!(set.contains(x)); + } + } + + #[test] + fn test_move_iter() { + let hs = { + let mut hs = HashSet::new(); + + hs.insert('a'); + hs.insert('b'); + + hs + }; + + let v = hs.into_iter().collect::>(); + assert!(v == ['a', 'b'] || v == ['b', 'a']); + } + + #[test] + fn test_eq() { + // These constants once happened to expose a bug in insert(). + // I'm keeping them around to prevent a regression. + let mut s1 = HashSet::new(); + + s1.insert(1); + s1.insert(2); + s1.insert(3); + + let mut s2 = HashSet::new(); + + s2.insert(1); + s2.insert(2); + + assert_ne!(s1, s2); + + s2.insert(3); + + assert_eq!(s1, s2); + } + + #[test] + fn test_show() { + let mut set = HashSet::new(); + let empty = HashSet::::new(); + + set.insert(1); + set.insert(2); + + let set_str = format!("{:?}", set); + + assert!(set_str == "{1, 2}" || set_str == "{2, 1}"); + assert_eq!(format!("{:?}", empty), "{}"); + } + + #[test] + fn test_trivial_drain() { + let mut s = HashSet::::new(); + for _ in s.drain() {} + assert!(s.is_empty()); + drop(s); + + let mut s = HashSet::::new(); + drop(s.drain()); + assert!(s.is_empty()); + } + + #[test] + fn test_drain() { + let mut s: HashSet<_> = (1..100).collect(); + + // try this a bunch of times to make sure we don't screw up internal state. + for _ in 0..20 { + assert_eq!(s.len(), 99); + + { + let mut last_i = 0; + let mut d = s.drain(); + for (i, x) in d.by_ref().take(50).enumerate() { + last_i = i; + assert_ne!(x, 0); + } + assert_eq!(last_i, 49); + } + + for _ in &s { + panic!("s should be empty!"); + } + + // reset to try again. + s.extend(1..100); + } + } + + #[test] + fn test_replace() { + use hash; + + #[derive(Debug)] + struct Foo(&'static str, i32); + + impl PartialEq for Foo { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + } + + impl Eq for Foo {} + + impl hash::Hash for Foo { + fn hash(&self, h: &mut H) { + self.0.hash(h); + } + } + + let mut s = HashSet::new(); + assert_eq!(s.replace(Foo("a", 1)), None); + assert_eq!(s.len(), 1); + assert_eq!(s.replace(Foo("a", 2)), Some(Foo("a", 1))); + assert_eq!(s.len(), 1); + + let mut it = s.iter(); + assert_eq!(it.next(), Some(&Foo("a", 2))); + assert_eq!(it.next(), None); + } + + #[test] + fn test_extend_ref() { + let mut a = HashSet::new(); + a.insert(1); + + a.extend(&[2, 3, 4]); + + assert_eq!(a.len(), 4); + assert!(a.contains(&1)); + assert!(a.contains(&2)); + assert!(a.contains(&3)); + assert!(a.contains(&4)); + + let mut b = HashSet::new(); + b.insert(5); + b.insert(6); + + a.extend(&b); + + assert_eq!(a.len(), 6); + assert!(a.contains(&1)); + assert!(a.contains(&2)); + assert!(a.contains(&3)); + assert!(a.contains(&4)); + assert!(a.contains(&5)); + assert!(a.contains(&6)); + } + + #[test] + fn test_retain() { + let xs = [1, 2, 3, 4, 5, 6]; + let mut set: HashSet = xs.iter().cloned().collect(); + set.retain(|&k| k % 2 == 0); + assert_eq!(set.len(), 3); + assert!(set.contains(&2)); + assert!(set.contains(&4)); + assert!(set.contains(&6)); + } +} diff --git a/servo/components/hashglobe/src/lib.rs b/servo/components/hashglobe/src/lib.rs new file mode 100644 index 000000000000..cf6e9710f5fe --- /dev/null +++ b/servo/components/hashglobe/src/lib.rs @@ -0,0 +1,71 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +pub mod alloc; +pub mod hash_map; +pub mod hash_set; +mod shim; +mod table; + +pub mod fake; + +use std::{error, fmt}; + +trait Recover { + type Key; + + fn get(&self, key: &Q) -> Option<&Self::Key>; + fn take(&mut self, key: &Q) -> Option; + fn replace(&mut self, key: Self::Key) -> Option; +} + +#[derive(Debug)] +pub struct AllocationInfo { + /// The size we are requesting. + size: usize, + /// The alignment we are requesting. + alignment: usize, +} + +#[derive(Debug)] +pub struct FailedAllocationError { + reason: &'static str, + /// The allocation info we are requesting, if needed. + allocation_info: Option, +} + +impl FailedAllocationError { + #[inline] + pub fn new(reason: &'static str) -> Self { + Self { + reason, + allocation_info: None, + } + } +} + +impl error::Error for FailedAllocationError { + fn description(&self) -> &str { + self.reason + } +} + +impl fmt::Display for FailedAllocationError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.allocation_info { + Some(ref info) => write!( + f, + "{}, allocation: (size: {}, alignment: {})", + self.reason, info.size, info.alignment + ), + None => self.reason.fmt(f), + } + } +} diff --git a/servo/components/hashglobe/src/shim.rs b/servo/components/hashglobe/src/shim.rs new file mode 100644 index 000000000000..855dbdcfa155 --- /dev/null +++ b/servo/components/hashglobe/src/shim.rs @@ -0,0 +1,61 @@ +use std::marker::PhantomData; + +// FIXME: remove this and use std::ptr::NonNull when Firefox requires Rust 1.25+ +pub struct NonZeroPtr(&'static T); + +impl NonZeroPtr { + pub unsafe fn new_unchecked(ptr: *mut T) -> Self { + NonZeroPtr(&*ptr) + } + pub fn as_ptr(&self) -> *mut T { + self.0 as *const T as *mut T + } +} + +pub struct Unique { + ptr: NonZeroPtr, + _marker: PhantomData, +} + +impl Unique { + pub unsafe fn new_unchecked(ptr: *mut T) -> Self { + Unique { + ptr: NonZeroPtr::new_unchecked(ptr), + _marker: PhantomData, + } + } + pub fn as_ptr(&self) -> *mut T { + self.ptr.as_ptr() + } +} + +unsafe impl Send for Unique {} + +unsafe impl Sync for Unique {} + +pub struct Shared { + ptr: NonZeroPtr, + _marker: PhantomData, + // force it to be !Send/!Sync + _marker2: PhantomData<*const u8>, +} + +impl Shared { + pub unsafe fn new_unchecked(ptr: *mut T) -> Self { + Shared { + ptr: NonZeroPtr::new_unchecked(ptr), + _marker: PhantomData, + _marker2: PhantomData, + } + } + + pub unsafe fn as_mut(&self) -> &mut T { + &mut *self.ptr.as_ptr() + } +} + +impl<'a, T> From<&'a mut T> for Shared { + fn from(reference: &'a mut T) -> Self { + unsafe { Shared::new_unchecked(reference) } + } +} diff --git a/servo/components/hashglobe/src/table.rs b/servo/components/hashglobe/src/table.rs new file mode 100644 index 000000000000..0fe08f2b0521 --- /dev/null +++ b/servo/components/hashglobe/src/table.rs @@ -0,0 +1,1230 @@ +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::alloc::{alloc, dealloc}; +use crate::shim::{Shared, Unique}; +use std::cmp; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::marker; +use std::mem::{self, align_of, size_of}; +use std::ops::{Deref, DerefMut}; +use std::ptr; + +use self::BucketState::*; +use crate::FailedAllocationError; + +/// Integer type used for stored hash values. +/// +/// No more than bit_width(usize) bits are needed to select a bucket. +/// +/// The most significant bit is ours to use for tagging `SafeHash`. +/// +/// (Even if we could have usize::MAX bytes allocated for buckets, +/// each bucket stores at least a `HashUint`, so there can be no more than +/// usize::MAX / size_of(usize) buckets.) +type HashUint = usize; + +const EMPTY_BUCKET: HashUint = 0; +const EMPTY: usize = 1; + +/// Special `Unique` that uses the lower bit of the pointer +/// to expose a boolean tag. +/// Note: when the pointer is initialized to EMPTY `.ptr()` will return +/// null and the tag functions shouldn't be used. +struct TaggedHashUintPtr(Unique); + +impl TaggedHashUintPtr { + #[inline] + unsafe fn new(ptr: *mut HashUint) -> Self { + debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize); + TaggedHashUintPtr(Unique::new_unchecked(ptr)) + } + + #[inline] + fn set_tag(&mut self, value: bool) { + let mut usize_ptr = self.0.as_ptr() as usize; + unsafe { + if value { + usize_ptr |= 1; + } else { + usize_ptr &= !1; + } + self.0 = Unique::new_unchecked(usize_ptr as *mut HashUint) + } + } + + #[inline] + fn tag(&self) -> bool { + (self.0.as_ptr() as usize) & 1 == 1 + } + + #[inline] + fn ptr(&self) -> *mut HashUint { + (self.0.as_ptr() as usize & !1) as *mut HashUint + } +} + +/// The raw hashtable, providing safe-ish access to the unzipped and highly +/// optimized arrays of hashes, and key-value pairs. +/// +/// This design is a lot faster than the naive +/// `Vec>`, because we don't pay for the overhead of an +/// option on every element, and we get a generally more cache-aware design. +/// +/// Essential invariants of this structure: +/// +/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` +/// points to 'undefined' contents. Don't read from it. This invariant is +/// enforced outside this module with the `EmptyBucket`, `FullBucket`, +/// and `SafeHash` types. +/// +/// - An `EmptyBucket` is only constructed at an index with +/// a hash of EMPTY_BUCKET. +/// +/// - A `FullBucket` is only constructed at an index with a +/// non-EMPTY_BUCKET hash. +/// +/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get +/// around hashes of zero by changing them to 0x8000_0000_0000_0000, +/// which will likely map to the same bucket, while not being confused +/// with "empty". +/// +/// - Both "arrays represented by pointers" are the same length: +/// `capacity`. This is set at creation and never changes. The arrays +/// are unzipped and are more cache aware (scanning through 8 hashes +/// brings in at most 2 cache lines, since they're all right beside each +/// other). This layout may waste space in padding such as in a map from +/// u64 to u8, but is a more cache conscious layout as the key-value pairs +/// are only very shortly probed and the desired value will be in the same +/// or next cache line. +/// +/// You can kind of think of this module/data structure as a safe wrapper +/// around just the "table" part of the hashtable. It enforces some +/// invariants at the type level and employs some performance trickery, +/// but in general is just a tricked out `Vec>`. +/// +/// The hashtable also exposes a special boolean tag. The tag defaults to false +/// when the RawTable is created and is accessible with the `tag` and `set_tag` +/// functions. +pub struct RawTable { + capacity_mask: usize, + size: usize, + hashes: TaggedHashUintPtr, + + // Because K/V do not appear directly in any of the types in the struct, + // inform rustc that in fact instances of K and V are reachable from here. + marker: marker::PhantomData<(K, V)>, +} + +unsafe impl Send for RawTable {} +unsafe impl Sync for RawTable {} + +// An unsafe view of a RawTable bucket +// Valid indexes are within [0..table_capacity) +pub struct RawBucket { + hash_start: *mut HashUint, + // We use *const to ensure covariance with respect to K and V + pair_start: *const (K, V), + idx: usize, + _marker: marker::PhantomData<(K, V)>, +} + +impl Copy for RawBucket {} +impl Clone for RawBucket { + fn clone(&self) -> RawBucket { + *self + } +} + +pub struct Bucket { + raw: RawBucket, + table: M, +} + +impl Copy for Bucket {} +impl Clone for Bucket { + fn clone(&self) -> Bucket { + *self + } +} + +pub struct EmptyBucket { + raw: RawBucket, + table: M, +} + +pub struct FullBucket { + raw: RawBucket, + table: M, +} + +pub type FullBucketMut<'table, K, V> = FullBucket>; + +pub enum BucketState { + Empty(EmptyBucket), + Full(FullBucket), +} + +// A GapThenFull encapsulates the state of two consecutive buckets at once. +// The first bucket, called the gap, is known to be empty. +// The second bucket is full. +pub struct GapThenFull { + gap: EmptyBucket, + full: FullBucket, +} + +/// A hash that is not zero, since we use a hash of zero to represent empty +/// buckets. +#[derive(PartialEq, Copy, Clone)] +pub struct SafeHash { + hash: HashUint, +} + +impl SafeHash { + /// Peek at the hash value, which is guaranteed to be non-zero. + #[inline(always)] + pub fn inspect(&self) -> HashUint { + self.hash + } + + #[inline(always)] + pub fn new(hash: u64) -> Self { + // We need to avoid 0 in order to prevent collisions with + // EMPTY_HASH. We can maintain our precious uniform distribution + // of initial indexes by unconditionally setting the MSB, + // effectively reducing the hashes by one bit. + // + // Truncate hash to fit in `HashUint`. + let hash_bits = size_of::() * 8; + SafeHash { + hash: (1 << (hash_bits - 1)) | (hash as HashUint), + } + } +} + +/// We need to remove hashes of 0. That's reserved for empty buckets. +/// This function wraps up `hash_keyed` to be the only way outside this +/// module to generate a SafeHash. +pub fn make_hash(hash_state: &S, t: &T) -> SafeHash +where + T: Hash, + S: BuildHasher, +{ + let mut state = hash_state.build_hasher(); + t.hash(&mut state); + SafeHash::new(state.finish()) +} + +// `replace` casts a `*HashUint` to a `*SafeHash`. Since we statically +// ensure that a `FullBucket` points to an index with a non-zero hash, +// and a `SafeHash` is just a `HashUint` with a different name, this is +// safe. +// +// This test ensures that a `SafeHash` really IS the same size as a +// `HashUint`. If you need to change the size of `SafeHash` (and +// consequently made this test fail), `replace` needs to be +// modified to no longer assume this. +#[test] +fn can_alias_safehash_as_hash() { + assert_eq!(size_of::(), size_of::()) +} + +// RawBucket methods are unsafe as it's possible to +// make a RawBucket point to invalid memory using safe code. +impl RawBucket { + unsafe fn hash(&self) -> *mut HashUint { + self.hash_start.offset(self.idx as isize) + } + unsafe fn pair(&self) -> *mut (K, V) { + self.pair_start.offset(self.idx as isize) as *mut (K, V) + } + unsafe fn hash_pair(&self) -> (*mut HashUint, *mut (K, V)) { + (self.hash(), self.pair()) + } +} + +// Buckets hold references to the table. +impl FullBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } + /// Borrow a mutable reference to the table. + pub fn table_mut(&mut self) -> &mut M { + &mut self.table + } + /// Move out the reference to the table. + pub fn into_table(self) -> M { + self.table + } + /// Get the raw index. + pub fn index(&self) -> usize { + self.raw.idx + } + /// Get the raw bucket. + pub fn raw(&self) -> RawBucket { + self.raw + } +} + +impl EmptyBucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.table + } + /// Borrow a mutable reference to the table. + pub fn table_mut(&mut self) -> &mut M { + &mut self.table + } +} + +impl Bucket { + /// Get the raw index. + pub fn index(&self) -> usize { + self.raw.idx + } + /// get the table. + pub fn into_table(self) -> M { + self.table + } +} + +impl Deref for FullBucket +where + M: Deref>, +{ + type Target = RawTable; + fn deref(&self) -> &RawTable { + &self.table + } +} + +/// `Put` is implemented for types which provide access to a table and cannot be invalidated +/// by filling a bucket. A similar implementation for `Take` is possible. +pub trait Put { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable; +} + +impl<'t, K, V> Put for &'t mut RawTable { + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + *self + } +} + +impl Put for Bucket +where + M: Put, +{ + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + self.table.borrow_table_mut() + } +} + +impl Put for FullBucket +where + M: Put, +{ + unsafe fn borrow_table_mut(&mut self) -> &mut RawTable { + self.table.borrow_table_mut() + } +} + +impl>> Bucket { + pub fn new(table: M, hash: SafeHash) -> Bucket { + Bucket::at_index(table, hash.inspect() as usize) + } + + pub fn new_from(r: RawBucket, t: M) -> Bucket { + Bucket { raw: r, table: t } + } + + pub fn at_index(table: M, ib_index: usize) -> Bucket { + // if capacity is 0, then the RawBucket will be populated with bogus pointers. + // This is an uncommon case though, so avoid it in release builds. + debug_assert!( + table.capacity() > 0, + "Table should have capacity at this point" + ); + let ib_index = ib_index & table.capacity_mask; + Bucket { + raw: table.raw_bucket_at(ib_index), + table, + } + } + + pub fn first(table: M) -> Bucket { + Bucket { + raw: table.raw_bucket_at(0), + table, + } + } + + // "So a few of the first shall be last: for many be called, + // but few chosen." + // + // We'll most likely encounter a few buckets at the beginning that + // have their initial buckets near the end of the table. They were + // placed at the beginning as the probe wrapped around the table + // during insertion. We must skip forward to a bucket that won't + // get reinserted too early and won't unfairly steal others spot. + // This eliminates the need for robin hood. + pub fn head_bucket(table: M) -> Bucket { + let mut bucket = Bucket::first(table); + + loop { + bucket = match bucket.peek() { + Full(full) => { + if full.displacement() == 0 { + // This bucket occupies its ideal spot. + // It indicates the start of another "cluster". + bucket = full.into_bucket(); + break; + } + // Leaving this bucket in the last cluster for later. + full.into_bucket() + }, + Empty(b) => { + // Encountered a hole between clusters. + b.into_bucket() + }, + }; + bucket.next(); + } + bucket + } + + /// Reads a bucket at a given index, returning an enum indicating whether + /// it's initialized or not. You need to match on this enum to get + /// the appropriate types to call most of the other functions in + /// this module. + pub fn peek(self) -> BucketState { + match unsafe { *self.raw.hash() } { + EMPTY_BUCKET => Empty(EmptyBucket { + raw: self.raw, + table: self.table, + }), + _ => Full(FullBucket { + raw: self.raw, + table: self.table, + }), + } + } + + /// Modifies the bucket in place to make it point to the next slot. + pub fn next(&mut self) { + self.raw.idx = self.raw.idx.wrapping_add(1) & self.table.capacity_mask; + } + + /// Modifies the bucket in place to make it point to the previous slot. + pub fn prev(&mut self) { + self.raw.idx = self.raw.idx.wrapping_sub(1) & self.table.capacity_mask; + } +} + +impl>> EmptyBucket { + #[inline] + pub fn next(self) -> Bucket { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + #[inline] + pub fn into_bucket(self) -> Bucket { + Bucket { + raw: self.raw, + table: self.table, + } + } + + pub fn gap_peek(self) -> Result, Bucket> { + let gap = EmptyBucket { + raw: self.raw, + table: (), + }; + + match self.next().peek() { + Full(bucket) => Ok(GapThenFull { gap, full: bucket }), + Empty(e) => Err(e.into_bucket()), + } + } +} + +impl EmptyBucket +where + M: Put, +{ + /// Puts given key and value pair, along with the key's hash, + /// into this bucket in the hashtable. Note how `self` is 'moved' into + /// this function, because this slot will no longer be empty when + /// we return! A `FullBucket` is returned for later use, pointing to + /// the newly-filled slot in the hashtable. + /// + /// Use `make_hash` to construct a `SafeHash` to pass to this function. + pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket { + unsafe { + *self.raw.hash() = hash.inspect(); + ptr::write(self.raw.pair(), (key, value)); + + self.table.borrow_table_mut().size += 1; + } + + FullBucket { + raw: self.raw, + table: self.table, + } + } +} + +impl>> FullBucket { + #[inline] + pub fn next(self) -> Bucket { + let mut bucket = self.into_bucket(); + bucket.next(); + bucket + } + + #[inline] + pub fn into_bucket(self) -> Bucket { + Bucket { + raw: self.raw, + table: self.table, + } + } + + /// Duplicates the current position. This can be useful for operations + /// on two or more buckets. + pub fn stash(self) -> FullBucket { + FullBucket { + raw: self.raw, + table: self, + } + } + + /// Get the distance between this bucket and the 'ideal' location + /// as determined by the key's hash stored in it. + /// + /// In the cited blog posts above, this is called the "distance to + /// initial bucket", or DIB. Also known as "probe count". + pub fn displacement(&self) -> usize { + // Calculates the distance one has to travel when going from + // `hash mod capacity` onwards to `idx mod capacity`, wrapping around + // if the destination is not reached before the end of the table. + (self.raw.idx.wrapping_sub(self.hash().inspect() as usize)) & self.table.capacity_mask + } + + #[inline] + pub fn hash(&self) -> SafeHash { + unsafe { + SafeHash { + hash: *self.raw.hash(), + } + } + } + + /// Gets references to the key and value at a given index. + pub fn read(&self) -> (&K, &V) { + unsafe { + let pair_ptr = self.raw.pair(); + (&(*pair_ptr).0, &(*pair_ptr).1) + } + } +} + +// We take a mutable reference to the table instead of accepting anything that +// implements `DerefMut` to prevent fn `take` from being called on `stash`ed +// buckets. +impl<'t, K, V> FullBucket> { + /// Removes this bucket's key and value from the hashtable. + /// + /// This works similarly to `put`, building an `EmptyBucket` out of the + /// taken bucket. + pub fn take(self) -> (EmptyBucket>, K, V) { + self.table.size -= 1; + + unsafe { + *self.raw.hash() = EMPTY_BUCKET; + let (k, v) = ptr::read(self.raw.pair()); + ( + EmptyBucket { + raw: self.raw, + table: self.table, + }, + k, + v, + ) + } + } +} + +// This use of `Put` is misleading and restrictive, but safe and sufficient for our use cases +// where `M` is a full bucket or table reference type with mutable access to the table. +impl FullBucket +where + M: Put, +{ + pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { + unsafe { + let old_hash = ptr::replace(self.raw.hash() as *mut SafeHash, h); + let (old_key, old_val) = ptr::replace(self.raw.pair(), (k, v)); + + (old_hash, old_key, old_val) + } + } +} + +impl FullBucket +where + M: Deref> + DerefMut, +{ + /// Gets mutable references to the key and value at a given index. + pub fn read_mut(&mut self) -> (&mut K, &mut V) { + unsafe { + let pair_ptr = self.raw.pair(); + (&mut (*pair_ptr).0, &mut (*pair_ptr).1) + } + } +} + +impl<'t, K, V, M> FullBucket +where + M: Deref> + 't, +{ + /// Exchange a bucket state for immutable references into the table. + /// Because the underlying reference to the table is also consumed, + /// no further changes to the structure of the table are possible; + /// in exchange for this, the returned references have a longer lifetime + /// than the references returned by `read()`. + pub fn into_refs(self) -> (&'t K, &'t V) { + unsafe { + let pair_ptr = self.raw.pair(); + (&(*pair_ptr).0, &(*pair_ptr).1) + } + } +} + +impl<'t, K, V, M> FullBucket +where + M: Deref> + DerefMut + 't, +{ + /// This works similarly to `into_refs`, exchanging a bucket state + /// for mutable references into the table. + pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { + unsafe { + let pair_ptr = self.raw.pair(); + (&mut (*pair_ptr).0, &mut (*pair_ptr).1) + } + } +} + +impl GapThenFull +where + M: Deref>, +{ + #[inline] + pub fn full(&self) -> &FullBucket { + &self.full + } + + pub fn into_table(self) -> M { + self.full.into_table() + } + + pub fn shift(mut self) -> Result, Bucket> { + unsafe { + let (gap_hash, gap_pair) = self.gap.raw.hash_pair(); + let (full_hash, full_pair) = self.full.raw.hash_pair(); + *gap_hash = mem::replace(&mut *full_hash, EMPTY_BUCKET); + ptr::copy_nonoverlapping(full_pair, gap_pair, 1); + } + + let FullBucket { raw: prev_raw, .. } = self.full; + + match self.full.next().peek() { + Full(bucket) => { + self.gap.raw = prev_raw; + + self.full = bucket; + + Ok(self) + }, + Empty(b) => Err(b.into_bucket()), + } + } +} + +/// Rounds up to a multiple of a power of two. Returns the closest multiple +/// of `target_alignment` that is higher or equal to `unrounded`. +/// +/// # Panics +/// +/// Panics if `target_alignment` is not a power of two. +#[inline] +fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { + assert!(target_alignment.is_power_of_two()); + (unrounded + target_alignment - 1) & !(target_alignment - 1) +} + +#[test] +fn test_rounding() { + assert_eq!(round_up_to_next(0, 4), 0); + assert_eq!(round_up_to_next(1, 4), 4); + assert_eq!(round_up_to_next(2, 4), 4); + assert_eq!(round_up_to_next(3, 4), 4); + assert_eq!(round_up_to_next(4, 4), 4); + assert_eq!(round_up_to_next(5, 4), 8); +} + +// Returns a tuple of (pairs_offset, end_of_pairs_offset), +// from the start of a mallocated array. +#[inline] +fn calculate_offsets( + hashes_size: usize, + pairs_size: usize, + pairs_align: usize, +) -> (usize, usize, bool) { + let pairs_offset = round_up_to_next(hashes_size, pairs_align); + let (end_of_pairs, oflo) = pairs_offset.overflowing_add(pairs_size); + + (pairs_offset, end_of_pairs, oflo) +} + +// Returns a tuple of (minimum required malloc alignment, hash_offset, +// array_size), from the start of a mallocated array. +fn calculate_allocation( + hash_size: usize, + hash_align: usize, + pairs_size: usize, + pairs_align: usize, +) -> (usize, usize, usize, bool) { + let hash_offset = 0; + let (_, end_of_pairs, oflo) = calculate_offsets(hash_size, pairs_size, pairs_align); + + let align = cmp::max(hash_align, pairs_align); + + (align, hash_offset, end_of_pairs, oflo) +} + +#[test] +fn test_offset_calculation() { + assert_eq!(calculate_allocation(128, 8, 16, 8), (8, 0, 144, false)); + assert_eq!(calculate_allocation(3, 1, 2, 1), (1, 0, 5, false)); + assert_eq!(calculate_allocation(6, 2, 12, 4), (4, 0, 20, false)); + assert_eq!(calculate_offsets(128, 15, 4), (128, 143, false)); + assert_eq!(calculate_offsets(3, 2, 4), (4, 6, false)); + assert_eq!(calculate_offsets(6, 12, 4), (8, 20, false)); +} + +impl RawTable { + unsafe fn new_uninitialized(capacity: usize) -> RawTable { + extern crate libc; + if let Ok(table) = Self::try_new_uninitialized(capacity) { + table + } else { + libc::abort(); + } + } + + /// Does not initialize the buckets. The caller should ensure they, + /// at the very least, set every hash to EMPTY_BUCKET. + unsafe fn try_new_uninitialized( + capacity: usize, + ) -> Result, FailedAllocationError> { + if capacity == 0 { + return Ok(RawTable { + size: 0, + capacity_mask: capacity.wrapping_sub(1), + hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint), + marker: marker::PhantomData, + }); + } + + // No need for `checked_mul` before a more restrictive check performed + // later in this method. + let hashes_size = capacity.wrapping_mul(size_of::()); + let pairs_size = capacity.wrapping_mul(size_of::<(K, V)>()); + + // Allocating hashmaps is a little tricky. We need to allocate two + // arrays, but since we know their sizes and alignments up front, + // we just allocate a single array, and then have the subarrays + // point into it. + // + // This is great in theory, but in practice getting the alignment + // right is a little subtle. Therefore, calculating offsets has been + // factored out into a different function. + let (alignment, hash_offset, size, oflo) = calculate_allocation( + hashes_size, + align_of::(), + pairs_size, + align_of::<(K, V)>(), + ); + + if oflo { + return Err(FailedAllocationError::new( + "capacity overflow when allocating RawTable", + )); + } + + // One check for overflow that covers calculation and rounding of size. + let size_of_bucket = size_of::() + .checked_add(size_of::<(K, V)>()) + .unwrap(); + + let cap_bytes = capacity.checked_mul(size_of_bucket); + + if let Some(cap_bytes) = cap_bytes { + if size < cap_bytes { + return Err(FailedAllocationError::new( + "capacity overflow when allocating RawTable", + )); + } + } else { + return Err(FailedAllocationError::new( + "capacity overflow when allocating RawTable", + )); + } + + // FORK NOTE: Uses alloc shim instead of Heap.alloc + let buffer = alloc(size, alignment); + + if buffer.is_null() { + use crate::AllocationInfo; + return Err(FailedAllocationError { + reason: "out of memory when allocating RawTable", + allocation_info: Some(AllocationInfo { size, alignment }), + }); + } + + let hashes = buffer.offset(hash_offset as isize) as *mut HashUint; + + Ok(RawTable { + capacity_mask: capacity.wrapping_sub(1), + size: 0, + hashes: TaggedHashUintPtr::new(hashes), + marker: marker::PhantomData, + }) + } + + fn raw_bucket_at(&self, index: usize) -> RawBucket { + let hashes_size = self.capacity() * size_of::(); + let pairs_size = self.capacity() * size_of::<(K, V)>(); + + let (pairs_offset, _, oflo) = + calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>()); + debug_assert!(!oflo, "capacity overflow"); + + let buffer = self.hashes.ptr() as *mut u8; + unsafe { + RawBucket { + hash_start: buffer as *mut HashUint, + pair_start: buffer.offset(pairs_offset as isize) as *const (K, V), + idx: index, + _marker: marker::PhantomData, + } + } + } + + /// Creates a new raw table from a given capacity. All buckets are + /// initially empty. + pub fn new(capacity: usize) -> Result, FailedAllocationError> { + unsafe { + let ret = RawTable::try_new_uninitialized(capacity)?; + ptr::write_bytes(ret.hashes.ptr(), 0, capacity); + Ok(ret) + } + } + + /// The hashtable's capacity, similar to a vector's. + pub fn capacity(&self) -> usize { + self.capacity_mask.wrapping_add(1) + } + + /// The number of elements ever `put` in the hashtable, minus the number + /// of elements ever `take`n. + pub fn size(&self) -> usize { + self.size + } + + fn raw_buckets(&self) -> RawBuckets { + RawBuckets { + raw: self.raw_bucket_at(0), + elems_left: self.size, + marker: marker::PhantomData, + } + } + + pub fn iter(&self) -> Iter { + Iter { + iter: self.raw_buckets(), + } + } + + pub fn iter_mut(&mut self) -> IterMut { + IterMut { + iter: self.raw_buckets(), + _marker: marker::PhantomData, + } + } + + pub fn into_iter(self) -> IntoIter { + let RawBuckets { + raw, elems_left, .. + } = self.raw_buckets(); + // Replace the marker regardless of lifetime bounds on parameters. + IntoIter { + iter: RawBuckets { + raw, + elems_left, + marker: marker::PhantomData, + }, + table: self, + } + } + + pub fn drain(&mut self) -> Drain { + let RawBuckets { + raw, elems_left, .. + } = self.raw_buckets(); + // Replace the marker regardless of lifetime bounds on parameters. + Drain { + iter: RawBuckets { + raw, + elems_left, + marker: marker::PhantomData, + }, + table: Shared::from(self), + marker: marker::PhantomData, + } + } + + /// Drops buckets in reverse order. It leaves the table in an inconsistent + /// state and should only be used for dropping the table's remaining + /// entries. It's used in the implementation of Drop. + unsafe fn rev_drop_buckets(&mut self) { + // initialize the raw bucket past the end of the table + let mut raw = self.raw_bucket_at(self.capacity()); + let mut elems_left = self.size; + + while elems_left != 0 { + raw.idx -= 1; + + if *raw.hash() != EMPTY_BUCKET { + elems_left -= 1; + ptr::drop_in_place(raw.pair()); + } + } + } + + /// Set the table tag + pub fn set_tag(&mut self, value: bool) { + self.hashes.set_tag(value) + } + + /// Get the table tag + pub fn tag(&self) -> bool { + self.hashes.tag() + } +} + +/// A raw iterator. The basis for some other iterators in this module. Although +/// this interface is safe, it's not used outside this module. +struct RawBuckets<'a, K, V> { + raw: RawBucket, + elems_left: usize, + + // Strictly speaking, this should be &'a (K,V), but that would + // require that K:'a, and we often use RawBuckets<'static...> for + // move iterations, so that messes up a lot of other things. So + // just use `&'a (K,V)` as this is not a publicly exposed type + // anyway. + marker: marker::PhantomData<&'a ()>, +} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +impl<'a, K, V> Clone for RawBuckets<'a, K, V> { + fn clone(&self) -> RawBuckets<'a, K, V> { + RawBuckets { + raw: self.raw, + elems_left: self.elems_left, + marker: marker::PhantomData, + } + } +} + +impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { + type Item = RawBucket; + + fn next(&mut self) -> Option> { + if self.elems_left == 0 { + return None; + } + + loop { + unsafe { + let item = self.raw; + self.raw.idx += 1; + if *item.hash() != EMPTY_BUCKET { + self.elems_left -= 1; + return Some(item); + } + } + } + } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } +} + +impl<'a, K, V> ExactSizeIterator for RawBuckets<'a, K, V> { + fn len(&self) -> usize { + self.elems_left + } +} + +/// Iterator over shared references to entries in a table. +pub struct Iter<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, +} + +unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {} +unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {} + +// FIXME(#19839) Remove in favor of `#[derive(Clone)]` +impl<'a, K, V> Clone for Iter<'a, K, V> { + fn clone(&self) -> Iter<'a, K, V> { + Iter { + iter: self.iter.clone(), + } + } +} + +/// Iterator over mutable references to entries in a table. +pub struct IterMut<'a, K: 'a, V: 'a> { + iter: RawBuckets<'a, K, V>, + // To ensure invariance with respect to V + _marker: marker::PhantomData<&'a mut V>, +} + +unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {} +// Both K: Sync and K: Send are correct for IterMut's Send impl, +// but Send is the more useful bound +unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {} + +impl<'a, K: 'a, V: 'a> IterMut<'a, K, V> { + pub fn iter(&self) -> Iter { + Iter { + iter: self.iter.clone(), + } + } +} + +/// Iterator over the entries in a table, consuming the table. +pub struct IntoIter { + table: RawTable, + iter: RawBuckets<'static, K, V>, +} + +unsafe impl Sync for IntoIter {} +unsafe impl Send for IntoIter {} + +impl IntoIter { + pub fn iter(&self) -> Iter { + Iter { + iter: self.iter.clone(), + } + } +} + +/// Iterator over the entries in a table, clearing the table. +pub struct Drain<'a, K: 'static, V: 'static> { + table: Shared>, + iter: RawBuckets<'static, K, V>, + marker: marker::PhantomData<&'a RawTable>, +} + +unsafe impl<'a, K: Sync, V: Sync> Sync for Drain<'a, K, V> {} +unsafe impl<'a, K: Send, V: Send> Send for Drain<'a, K, V> {} + +impl<'a, K, V> Drain<'a, K, V> { + pub fn iter(&self) -> Iter { + Iter { + iter: self.iter.clone(), + } + } +} + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.iter.next().map(|raw| unsafe { + let pair_ptr = raw.pair(); + (&(*pair_ptr).0, &(*pair_ptr).1) + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.iter.next().map(|raw| unsafe { + let pair_ptr = raw.pair(); + (&(*pair_ptr).0, &mut (*pair_ptr).1) + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl Iterator for IntoIter { + type Item = (SafeHash, K, V); + + fn next(&mut self) -> Option<(SafeHash, K, V)> { + self.iter.next().map(|raw| { + self.table.size -= 1; + unsafe { + let (k, v) = ptr::read(raw.pair()); + (SafeHash { hash: *raw.hash() }, k, v) + } + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl ExactSizeIterator for IntoIter { + fn len(&self) -> usize { + self.iter().len() + } +} + +impl<'a, K, V> Iterator for Drain<'a, K, V> { + type Item = (SafeHash, K, V); + + #[inline] + fn next(&mut self) -> Option<(SafeHash, K, V)> { + self.iter.next().map(|raw| unsafe { + self.table.as_mut().size -= 1; + let (k, v) = ptr::read(raw.pair()); + ( + SafeHash { + hash: ptr::replace(&mut *raw.hash(), EMPTY_BUCKET), + }, + k, + v, + ) + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'a, K: 'static, V: 'static> Drop for Drain<'a, K, V> { + fn drop(&mut self) { + for _ in self {} + } +} + +impl Clone for RawTable { + fn clone(&self) -> RawTable { + unsafe { + let cap = self.capacity(); + let mut new_ht = RawTable::new_uninitialized(cap); + + let mut new_buckets = new_ht.raw_bucket_at(0); + let mut buckets = self.raw_bucket_at(0); + while buckets.idx < cap { + *new_buckets.hash() = *buckets.hash(); + if *new_buckets.hash() != EMPTY_BUCKET { + let pair_ptr = buckets.pair(); + let kv = ((*pair_ptr).0.clone(), (*pair_ptr).1.clone()); + ptr::write(new_buckets.pair(), kv); + } + buckets.idx += 1; + new_buckets.idx += 1; + } + + new_ht.size = self.size(); + + new_ht + } + } +} + +// FORK NOTE: There may be lifetime errors that do not occur on std::HashMap +// since we removed the may_dangle (which allows more things to compile but has stricter guarantees). +// Generally we should be fine as long as no borrowed data is stuck into the map. +impl Drop for RawTable { + fn drop(&mut self) { + if self.capacity() == 0 { + return; + } + + // This is done in reverse because we've likely partially taken + // some elements out with `.into_iter()` from the front. + // Check if the size is 0, so we don't do a useless scan when + // dropping empty tables such as on resize. + // Also avoid double drop of elements that have been already moved out. + unsafe { + // FORK NOTE: Can't needs_drop on stable + // if needs_drop::<(K, V)>() { + // avoid linear runtime for types that don't need drop + self.rev_drop_buckets(); + // } + } + + let hashes_size = self.capacity() * size_of::(); + let pairs_size = self.capacity() * size_of::<(K, V)>(); + let (align, _, _, oflo) = calculate_allocation( + hashes_size, + align_of::(), + pairs_size, + align_of::<(K, V)>(), + ); + + debug_assert!(!oflo, "should be impossible"); + + unsafe { + dealloc(self.hashes.ptr() as *mut u8, align); + // Remember how everything was allocated out of one buffer + // during initialization? We only need one call to free here. + } + } +} diff --git a/servo/components/malloc_size_of/Cargo.toml b/servo/components/malloc_size_of/Cargo.toml index 1b2a66e97053..77915f1fa67c 100644 --- a/servo/components/malloc_size_of/Cargo.toml +++ b/servo/components/malloc_size_of/Cargo.toml @@ -33,7 +33,7 @@ content-security-policy = { version = "0.4.0", features = ["serde"], optional = crossbeam-channel = { version = "0.4", optional = true } cssparser = "0.27" euclid = "0.20" -hashbrown = "0.7" +hashglobe = { path = "../hashglobe" } hyper = { version = "0.12", optional = true } hyper_serde = { version = "0.11", optional = true } keyboard-types = { version = "0.4.3", optional = true } diff --git a/servo/components/malloc_size_of/lib.rs b/servo/components/malloc_size_of/lib.rs index c5c53c7bc317..9caf427f475b 100644 --- a/servo/components/malloc_size_of/lib.rs +++ b/servo/components/malloc_size_of/lib.rs @@ -55,7 +55,7 @@ extern crate content_security_policy; extern crate crossbeam_channel; extern crate cssparser; extern crate euclid; -extern crate hashbrown; +extern crate hashglobe; #[cfg(feature = "servo")] extern crate hyper; #[cfg(feature = "servo")] @@ -487,7 +487,8 @@ macro_rules! malloc_size_of_hash_set { } malloc_size_of_hash_set!(std::collections::HashSet); -malloc_size_of_hash_set!(hashbrown::HashSet); +malloc_size_of_hash_set!(hashglobe::hash_set::HashSet); +malloc_size_of_hash_set!(hashglobe::fake::HashSet); macro_rules! malloc_size_of_hash_map { ($ty:ty) => { @@ -527,7 +528,8 @@ macro_rules! malloc_size_of_hash_map { } malloc_size_of_hash_map!(std::collections::HashMap); -malloc_size_of_hash_map!(hashbrown::HashMap); +malloc_size_of_hash_map!(hashglobe::hash_map::HashMap); +malloc_size_of_hash_map!(hashglobe::fake::HashMap); impl MallocShallowSizeOf for std::collections::BTreeMap where diff --git a/servo/components/style/Cargo.toml b/servo/components/style/Cargo.toml index 54ddd52589cc..b7899be7f30f 100644 --- a/servo/components/style/Cargo.toml +++ b/servo/components/style/Cargo.toml @@ -17,7 +17,7 @@ path = "lib.rs" doctest = false [features] -gecko = ["nsstring", "serde", "style_traits/gecko", "bindgen", "regex", "toml"] +gecko = ["nsstring", "serde", "style_traits/gecko", "fallible/known_system_malloc", "bindgen", "regex", "toml"] servo = ["serde", "style_traits/servo", "servo_atoms", "servo_config", "html5ever", "cssparser/serde", "encoding_rs", "malloc_size_of/servo", "arrayvec/use_union", "servo_url", "string_cache", "to_shmem/servo", "servo_arc/servo"] @@ -41,6 +41,7 @@ euclid = "0.20" fallible = { path = "../fallible" } fxhash = "0.2" hashbrown = "0.7" +hashglobe = { path = "../hashglobe" } html5ever = {version = "0.24", optional = true} indexmap = "1.0" itertools = "0.8" diff --git a/servo/components/style/hash.rs b/servo/components/style/hash.rs index 9c57299e97dc..197c5c128320 100644 --- a/servo/components/style/hash.rs +++ b/servo/components/style/hash.rs @@ -2,11 +2,28 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -//! Reexports of hashbrown, without and with FxHash +//! Reexports of hashglobe types in Gecko mode, and stdlib hashmap shims in Servo mode +//! +//! Can go away when the stdlib gets fallible collections +//! https://github.com/rust-lang/rfcs/pull/2116 use fxhash; -pub use hashbrown::{hash_map as map, HashMap, HashSet}; +#[cfg(feature = "gecko")] +pub use hashglobe::hash_map::HashMap; +#[cfg(feature = "gecko")] +pub use hashglobe::hash_set::HashSet; + +#[cfg(feature = "servo")] +pub use hashglobe::fake::{HashMap, HashSet}; + +/// Appropriate reexports of hash_map types +pub mod map { + #[cfg(feature = "gecko")] + pub use hashglobe::hash_map::{Entry, Iter}; + #[cfg(feature = "servo")] + pub use std::collections::hash_map::{Entry, Iter}; +} /// Hash map that uses the Fx hasher pub type FxHashMap = HashMap; diff --git a/servo/components/style/invalidation/element/invalidation_map.rs b/servo/components/style/invalidation/element/invalidation_map.rs index ca53a2212ba7..1f3a6e1db258 100644 --- a/servo/components/style/invalidation/element/invalidation_map.rs +++ b/servo/components/style/invalidation/element/invalidation_map.rs @@ -11,8 +11,8 @@ use crate::selector_map::{ }; use crate::selector_parser::SelectorImpl; use crate::{Atom, LocalName, Namespace}; -use fallible::{FallibleHashMap, FallibleVec}; -use hashbrown::CollectionAllocErr; +use fallible::FallibleVec; +use hashglobe::FailedAllocationError; use selectors::attr::NamespaceConstraint; use selectors::parser::{Combinator, Component}; use selectors::parser::{Selector, SelectorIter}; @@ -35,7 +35,7 @@ use smallvec::SmallVec; /// We generate a Dependency for both |a _ b:X _| and |a _ b:X _ c _ d:Y _|, /// even though those selectors may not appear on their own in any stylesheet. /// This allows us to quickly scan through the dependency sites of all style -/// rules and determine the maximum effect that a given state or attribute +/// rules and determine the maximum effect that a given state or attributef /// change may have on the style of elements in the document. #[derive(Clone, Debug, MallocSizeOf)] pub struct Dependency { @@ -244,7 +244,7 @@ impl InvalidationMap { &mut self, selector: &Selector, quirks_mode: QuirksMode, - ) -> Result<(), CollectionAllocErr> { + ) -> Result<(), FailedAllocationError> { debug!("InvalidationMap::note_selector({:?})", selector); let mut document_state = DocumentState::empty(); @@ -325,7 +325,7 @@ struct SelectorDependencyCollector<'a> { compound_state: PerCompoundState, /// The allocation error, if we OOM. - alloc_error: &'a mut Option, + alloc_error: &'a mut Option, } impl<'a> SelectorDependencyCollector<'a> { diff --git a/servo/components/style/invalidation/stylesheets.rs b/servo/components/style/invalidation/stylesheets.rs index 19b6d2613840..244366af21db 100644 --- a/servo/components/style/invalidation/stylesheets.rs +++ b/servo/components/style/invalidation/stylesheets.rs @@ -18,7 +18,6 @@ use crate::stylesheets::{CssRule, StylesheetInDocument}; use crate::Atom; use crate::CaseSensitivityExt; use crate::LocalName as SelectorLocalName; -use fallible::FallibleHashSet; use fxhash::FxHasher; use selectors::attr::CaseSensitivity; use selectors::parser::{Component, LocalName, Selector}; diff --git a/servo/components/style/lib.rs b/servo/components/style/lib.rs index 6c4355ff329d..6116c12665a7 100644 --- a/servo/components/style/lib.rs +++ b/servo/components/style/lib.rs @@ -44,7 +44,7 @@ extern crate fxhash; #[cfg(feature = "gecko")] #[macro_use] pub mod gecko_string_cache; -extern crate hashbrown; +extern crate hashglobe; #[cfg(feature = "servo")] #[macro_use] extern crate html5ever; diff --git a/servo/components/style/selector_map.rs b/servo/components/style/selector_map.rs index 92628c69307c..6350caef0cfd 100644 --- a/servo/components/style/selector_map.rs +++ b/servo/components/style/selector_map.rs @@ -14,8 +14,8 @@ use crate::rule_tree::CascadeLevel; use crate::selector_parser::SelectorImpl; use crate::stylist::Rule; use crate::{Atom, LocalName, Namespace, WeakAtom}; -use fallible::{FallibleHashMap, FallibleVec}; -use hashbrown::CollectionAllocErr; +use fallible::FallibleVec; +use hashglobe::FailedAllocationError; use precomputed_hash::PrecomputedHash; use selectors::matching::{matches_selector, ElementSelectorFlags, MatchingContext}; use selectors::parser::{Combinator, Component, SelectorIter}; @@ -95,7 +95,7 @@ pub trait SelectorMapEntry: Sized + Clone { /// /// TODO: Tune the initial capacity of the HashMap #[derive(Debug, MallocSizeOf)] -pub struct SelectorMap { +pub struct SelectorMap { /// Rules that have `:root` selectors. pub root: SmallVec<[T; 1]>, /// A hash from an ID to rules which contain that ID selector. @@ -112,14 +112,17 @@ pub struct SelectorMap { pub count: usize, } -impl Default for SelectorMap { +impl Default for SelectorMap { #[inline] fn default() -> Self { Self::new() } } -impl SelectorMap { +// FIXME(Manishearth) the 'static bound can be removed when +// our HashMap fork (hashglobe) is able to use NonZero, +// or when stdlib gets fallible collections +impl SelectorMap { /// Trivially constructs an empty `SelectorMap`. pub fn new() -> Self { SelectorMap { @@ -276,7 +279,11 @@ impl SelectorMap { impl SelectorMap { /// Inserts an entry into the correct bucket(s). - pub fn insert(&mut self, entry: T, quirks_mode: QuirksMode) -> Result<(), CollectionAllocErr> { + pub fn insert( + &mut self, + entry: T, + quirks_mode: QuirksMode, + ) -> Result<(), FailedAllocationError> { self.count += 1; // NOTE(emilio): It'd be nice for this to be a separate function, but @@ -609,9 +616,14 @@ fn find_bucket<'a>( /// Wrapper for PrecomputedHashMap that does ASCII-case-insensitive lookup in quirks mode. #[derive(Debug, MallocSizeOf)] -pub struct MaybeCaseInsensitiveHashMap(PrecomputedHashMap); +pub struct MaybeCaseInsensitiveHashMap( + PrecomputedHashMap, +); -impl MaybeCaseInsensitiveHashMap { +// FIXME(Manishearth) the 'static bound can be removed when +// our HashMap fork (hashglobe) is able to use NonZero, +// or when stdlib gets fallible collections +impl MaybeCaseInsensitiveHashMap { /// Empty map pub fn new() -> Self { MaybeCaseInsensitiveHashMap(PrecomputedHashMap::default()) @@ -622,8 +634,7 @@ impl MaybeCaseInsensitiveHashMap { &mut self, mut key: Atom, quirks_mode: QuirksMode, - ) -> Result>, CollectionAllocErr> - { + ) -> Result, FailedAllocationError> { if quirks_mode == QuirksMode::Quirks { key = key.to_ascii_lowercase() } diff --git a/servo/components/style/stylist.rs b/servo/components/style/stylist.rs index fe8d5cb6437b..bb6aec6c52de 100644 --- a/servo/components/style/stylist.rs +++ b/servo/components/style/stylist.rs @@ -33,8 +33,8 @@ use crate::stylesheets::{CounterStyleRule, FontFaceRule, FontFeatureValuesRule, use crate::stylesheets::{CssRule, Origin, OriginSet, PerOrigin, PerOriginIter}; use crate::thread_state::{self, ThreadState}; use crate::{Atom, LocalName, Namespace, WeakAtom}; -use fallible::{FallibleHashMap, FallibleVec}; -use hashbrown::CollectionAllocErr; +use fallible::FallibleVec; +use hashglobe::FailedAllocationError; use malloc_size_of::MallocSizeOf; #[cfg(feature = "gecko")] use malloc_size_of::{MallocShallowSizeOf, MallocSizeOfOps, MallocUnconditionalShallowSizeOf}; @@ -88,7 +88,7 @@ impl UserAgentCascadeDataCache { device: &Device, quirks_mode: QuirksMode, guard: &SharedRwLockReadGuard, - ) -> Result, CollectionAllocErr> + ) -> Result, FailedAllocationError> where I: Iterator + Clone, S: StylesheetInDocument + ToMediaListKey + PartialEq + 'static, @@ -260,7 +260,7 @@ impl DocumentCascadeData { quirks_mode: QuirksMode, mut flusher: DocumentStylesheetFlusher<'a, S>, guards: &StylesheetGuards, - ) -> Result<(), CollectionAllocErr> + ) -> Result<(), FailedAllocationError> where S: StylesheetInDocument + ToMediaListKey + PartialEq + 'static, { @@ -1835,7 +1835,7 @@ impl CascadeData { quirks_mode: QuirksMode, collection: SheetCollectionFlusher, guard: &SharedRwLockReadGuard, - ) -> Result<(), CollectionAllocErr> + ) -> Result<(), FailedAllocationError> where S: StylesheetInDocument + ToMediaListKey + PartialEq + 'static, { @@ -1972,7 +1972,7 @@ impl CascadeData { guard: &SharedRwLockReadGuard, rebuild_kind: SheetRebuildKind, mut precomputed_pseudo_element_decls: Option<&mut PrecomputedPseudoElementDeclarations>, - ) -> Result<(), CollectionAllocErr> + ) -> Result<(), FailedAllocationError> where S: StylesheetInDocument + ToMediaListKey + 'static, { diff --git a/tools/lint/clippy.yml b/tools/lint/clippy.yml index cdd23376ee87..735c9bcfcf64 100644 --- a/tools/lint/clippy.yml +++ b/tools/lint/clippy.yml @@ -94,6 +94,7 @@ clippy: - media/audioipc/audioipc/ - media/audioipc/server/ - tools/lint/test/files/clippy/ + - servo/components/hashglobe/ - servo/ports/geckolib/ - servo/ports/geckolib/tests/ - servo/tests/unit/malloc_size_of/ diff --git a/tools/lint/license.yml b/tools/lint/license.yml index bd5646b1bb02..408d06578558 100644 --- a/tools/lint/license.yml +++ b/tools/lint/license.yml @@ -46,6 +46,8 @@ license: - security/mac/hardenedruntime/plugin-container.developer.entitlements.xml - security/mac/hardenedruntime/plugin-container.production.entitlements.xml - security/mac/hardenedruntime/production.entitlements.xml + - servo/components/hashglobe/src/alloc.rs + - servo/components/hashglobe/src/shim.rs - toolkit/components/reputationservice/chromium/chrome/common/safe_browsing/csd.pb.cc - toolkit/components/reputationservice/chromium/chrome/common/safe_browsing/csd.pb.h - toolkit/mozapps/update/updater/crctable.h