зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1910796 - Integrate libz-rs-sys as a replacement for zlib. r=supply-chain-reviewers,firefox-build-system-reviewers,nika,sergesanspaille
Disabled by default for now. Differential Revision: https://phabricator.services.mozilla.com/D218165
This commit is contained in:
Родитель
9f78e9ec82
Коммит
9f3d6d14c7
|
@ -50,6 +50,11 @@ git = "https://github.com/jfkthame/mapped_hyph.git"
|
|||
rev = "eff105f6ad7ec9b79816cfc1985a28e5340ad14b"
|
||||
replace-with = "vendored-sources"
|
||||
|
||||
[source."git+https://github.com/memorysafety/zlib-rs?rev=4aa430ccb77537d0d60dab8db993ca51bb1194c5"]
|
||||
git = "https://github.com/memorysafety/zlib-rs"
|
||||
rev = "4aa430ccb77537d0d60dab8db993ca51bb1194c5"
|
||||
replace-with = "vendored-sources"
|
||||
|
||||
[source."git+https://github.com/mozilla-spidermonkey/jsparagus?rev=61f399c53a641ebd3077c1f39f054f6d396a633c"]
|
||||
git = "https://github.com/mozilla-spidermonkey/jsparagus"
|
||||
rev = "61f399c53a641ebd3077c1f39f054f6d396a633c"
|
||||
|
|
|
@ -2323,6 +2323,7 @@ dependencies = [
|
|||
"kvstore",
|
||||
"l10nregistry",
|
||||
"l10nregistry-ffi",
|
||||
"libz-rs-sys",
|
||||
"lmdb-rkv-sys",
|
||||
"localization-ffi",
|
||||
"log",
|
||||
|
@ -3407,6 +3408,14 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-rs-sys"
|
||||
version = "0.2.1"
|
||||
source = "git+https://github.com/memorysafety/zlib-rs?rev=4aa430ccb77537d0d60dab8db993ca51bb1194c5#4aa430ccb77537d0d60dab8db993ca51bb1194c5"
|
||||
dependencies = [
|
||||
"zlib-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "line-wrap"
|
||||
version = "0.1.1"
|
||||
|
@ -7240,3 +7249,8 @@ dependencies = [
|
|||
"memchr",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zlib-rs"
|
||||
version = "0.2.1"
|
||||
source = "git+https://github.com/memorysafety/zlib-rs?rev=4aa430ccb77537d0d60dab8db993ca51bb1194c5#4aa430ccb77537d0d60dab8db993ca51bb1194c5"
|
||||
|
|
|
@ -261,6 +261,9 @@ export IPHONEOS_SDK_DIR
|
|||
PATH := $(topsrcdir)/build/macosx:$(PATH)
|
||||
endif
|
||||
endif
|
||||
# Use the same prefix as set through modules/zlib/src/mozzconf.h
|
||||
# for libz-rs-sys, since we still use the headers from there.
|
||||
export LIBZ_RS_SYS_PREFIX=MOZ_Z_
|
||||
|
||||
ifndef RUSTC_BOOTSTRAP
|
||||
RUSTC_BOOTSTRAP := mozglue_static,qcms
|
||||
|
|
|
@ -53,9 +53,13 @@ if CONFIG["JS_HAS_INTL_API"]:
|
|||
USE_LIBS += [
|
||||
"fdlibm",
|
||||
"nspr",
|
||||
"zlib",
|
||||
]
|
||||
|
||||
if not CONFIG["USE_LIBZ_RS"]:
|
||||
USE_LIBS += [
|
||||
"zlib",
|
||||
]
|
||||
|
||||
if CONFIG["OS_ARCH"] != "WINNT":
|
||||
OS_LIBS += [
|
||||
"m",
|
||||
|
|
|
@ -84,7 +84,8 @@ SOURCES += [
|
|||
# zlib library
|
||||
DEFINES['FT_CONFIG_OPTION_SYSTEM_ZLIB'] = True
|
||||
CFLAGS += CONFIG['MOZ_ZLIB_CFLAGS']
|
||||
USE_LIBS += ['zlib']
|
||||
if not CONFIG["USE_LIBZ_RS"]:
|
||||
USE_LIBS += ['zlib']
|
||||
|
||||
# png library
|
||||
DEFINES['FT_CONFIG_OPTION_USE_PNG'] = True
|
||||
|
|
|
@ -889,6 +889,13 @@ pkg_check_modules("MOZ_ZLIB", "zlib >= 1.2.3", when="--with-system-zlib")
|
|||
|
||||
set_config("MOZ_SYSTEM_ZLIB", True, when="--with-system-zlib")
|
||||
|
||||
option(
|
||||
env="USE_LIBZ_RS",
|
||||
help="Use libz-rs-sys instead of zlib",
|
||||
when=toolkit & ~with_system_zlib_option,
|
||||
)
|
||||
|
||||
set_config("USE_LIBZ_RS", True, when="USE_LIBZ_RS")
|
||||
|
||||
with only_when(cross_compiling):
|
||||
option(
|
||||
|
|
|
@ -2769,6 +2769,12 @@ who = "Mark Hammond <mhammond@mozilla.com>"
|
|||
criteria = "safe-to-deploy"
|
||||
delta = "0.27.0 -> 0.28.0"
|
||||
|
||||
[[audits.libz-rs-sys]]
|
||||
who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.2.1 -> 0.2.1@git:4aa430ccb77537d0d60dab8db993ca51bb1194c5"
|
||||
importable = false
|
||||
|
||||
[[audits.linked-hash-map]]
|
||||
who = "Aria Beingessner <a.beingessner@gmail.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
|
@ -5446,6 +5452,12 @@ non-1-byte-aligned type, however right now that is not the case
|
|||
(submitted https://github.com/zip-rs/zip2/issues/198).
|
||||
"""
|
||||
|
||||
[[audits.zlib-rs]]
|
||||
who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.2.1 -> 0.2.1@git:4aa430ccb77537d0d60dab8db993ca51bb1194c5"
|
||||
importable = false
|
||||
|
||||
[[trusted.aho-corasick]]
|
||||
criteria = "safe-to-deploy"
|
||||
user-id = 189 # Andrew Gallant (BurntSushi)
|
||||
|
@ -5602,6 +5614,12 @@ user-id = 51017 # Yuki Okushi (JohnTitor)
|
|||
start = "2020-03-17"
|
||||
end = "2024-10-25"
|
||||
|
||||
[[trusted.libz-rs-sys]]
|
||||
criteria = "safe-to-deploy"
|
||||
user-id = 1303 # Ruben Nijveld (rnijveld)
|
||||
start = "2024-02-23"
|
||||
end = "2024-09-01"
|
||||
|
||||
[[trusted.linux-raw-sys]]
|
||||
criteria = "safe-to-deploy"
|
||||
user-id = 6825 # Dan Gohman (sunfishcode)
|
||||
|
@ -5907,3 +5925,9 @@ criteria = "safe-to-deploy"
|
|||
user-id = 64539 # Kenny Kerr (kennykerr)
|
||||
start = "2021-11-15"
|
||||
end = "2024-09-12"
|
||||
|
||||
[[trusted.zlib-rs]]
|
||||
criteria = "safe-to-deploy"
|
||||
user-id = 1303 # Ruben Nijveld (rnijveld)
|
||||
start = "2024-02-23"
|
||||
end = "2024-09-01"
|
||||
|
|
|
@ -94,6 +94,9 @@ notes = "This crate has two testing-only dependencies which are specified as reg
|
|||
audit-as-crates-io = false
|
||||
notes = "This override is an api-compatible fork with an orthogonal implementation."
|
||||
|
||||
[policy."libz-rs-sys:0.2.1@git:4aa430ccb77537d0d60dab8db993ca51bb1194c5"]
|
||||
audit-as-crates-io = true
|
||||
|
||||
[policy.malloc_size_of_derive]
|
||||
audit-as-crates-io = false
|
||||
notes = "This was originally servo code which Bobby Holley put on crates.io some years ago and that was moved in-tree as first-party code later on."
|
||||
|
@ -227,6 +230,9 @@ notes = "Local override of the crates.io crate that uses a non-vendored local co
|
|||
[policy.wr_malloc_size_of]
|
||||
audit-as-crates-io = false
|
||||
|
||||
[policy."zlib-rs:0.2.1@git:4aa430ccb77537d0d60dab8db993ca51bb1194c5"]
|
||||
audit-as-crates-io = true
|
||||
|
||||
[[exemptions.ahash]]
|
||||
version = "0.7.6"
|
||||
criteria = "safe-to-deploy"
|
||||
|
|
|
@ -316,6 +316,13 @@ user-id = 51017
|
|||
user-login = "JohnTitor"
|
||||
user-name = "Yuki Okushi"
|
||||
|
||||
[[publisher.libz-rs-sys]]
|
||||
version = "0.2.1"
|
||||
when = "2024-07-08"
|
||||
user-id = 1303
|
||||
user-login = "rnijveld"
|
||||
user-name = "Ruben Nijveld"
|
||||
|
||||
[[publisher.linux-raw-sys]]
|
||||
version = "0.4.12"
|
||||
when = "2023-11-30"
|
||||
|
@ -812,6 +819,13 @@ user-id = 48
|
|||
user-login = "badboy"
|
||||
user-name = "Jan-Erik Rediger"
|
||||
|
||||
[[publisher.zlib-rs]]
|
||||
version = "0.2.1"
|
||||
when = "2024-07-08"
|
||||
user-id = 1303
|
||||
user-login = "rnijveld"
|
||||
user-name = "Ruben Nijveld"
|
||||
|
||||
[[audits.bytecode-alliance.wildcard-audits.arbitrary]]
|
||||
who = "Nick Fitzgerald <fitzgen@gmail.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"files":{"Cargo.toml":"662b4b3805de22d6f8024b5f127a97d4915211edec7ed14d6c774086728cd5ea","LICENSE":"7d60612df8fcd9d3714871a95b4d3012563246fdea8f6710b7567f83cfa3c8ef","README.md":"46f48b56018d0efef5738be7d930019631899dede51ee5e92f44bd53f6e26749","src/lib.rs":"44e21c7ccacbf35f483ab9d777802624a0d111d161cfd2778a37aa5f33279c47"},"package":null}
|
|
@ -0,0 +1,38 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2021"
|
||||
rust-version = "1.75"
|
||||
name = "libz-rs-sys"
|
||||
version = "0.2.1"
|
||||
publish = true
|
||||
description = "A memory-safe zlib implementation written in rust"
|
||||
homepage = "https://github.com/memorysafety/zlib-rs"
|
||||
readme = "README.md"
|
||||
license = "Zlib"
|
||||
repository = "https://github.com/memorysafety/zlib-rs"
|
||||
|
||||
[dependencies.zlib-rs]
|
||||
version = "0.2.1"
|
||||
path = "../zlib-rs"
|
||||
default-features = false
|
||||
|
||||
[features]
|
||||
c-allocator = ["zlib-rs/c-allocator"]
|
||||
custom-prefix = []
|
||||
default = [
|
||||
"std",
|
||||
"rust-allocator",
|
||||
]
|
||||
rust-allocator = ["zlib-rs/rust-allocator"]
|
||||
std = ["zlib-rs/std"]
|
||||
testing-prefix = []
|
|
@ -0,0 +1,19 @@
|
|||
(C) 2024 Internet Security Research Group
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
|
@ -0,0 +1,90 @@
|
|||
This crate is a C API for [zlib-rs](https://docs.rs/zlib-rs/latest/zlib_rs/). The API is broadly equivalent to [`zlib-sys`](https://docs.rs/libz-sys/latest/libz_sys/) and [`zlib-ng-sys`](https://docs.rs/libz-ng-sys/latest/libz_ng_sys/), but does not currently provide the `gz*` family of functions.
|
||||
|
||||
From a rust perspective, this API is not very ergonomic. Use the [`flate2`](https://crates.io/crates/flate2) crate for a more
|
||||
ergonomic rust interface to zlib.
|
||||
|
||||
# Features
|
||||
|
||||
**`custom-prefix`**
|
||||
|
||||
Add a custom prefix to all exported symbols.
|
||||
|
||||
The value of the `LIBZ_RS_SYS_PREFIX` is used as a prefix for all exported symbols. For example:
|
||||
|
||||
```ignore
|
||||
> LIBZ_RS_SYS_PREFIX="MY_CUSTOM_PREFIX" cargo build -p libz-rs-sys --features=custom-prefix
|
||||
Compiling libz-rs-sys v0.2.1 (/home/folkertdev/rust/zlib-rs/libz-rs-sys)
|
||||
Finished `dev` profile [optimized + debuginfo] target(s) in 0.21s
|
||||
> objdump -tT target/debug/liblibz_rs_sys.so | grep "uncompress"
|
||||
0000000000081028 l O .got 0000000000000000 _ZN7zlib_rs7inflate10uncompress17he7d985e55c58a189E$got
|
||||
000000000002c570 l F .text 00000000000001ef _ZN7zlib_rs7inflate10uncompress17he7d985e55c58a189E
|
||||
0000000000024330 g F .text 000000000000008e MY_CUSTOM_PREFIXuncompress
|
||||
0000000000024330 g DF .text 000000000000008e Base MY_CUSTOM_PREFIXuncompress
|
||||
```
|
||||
|
||||
**`c-allocator`, `rust-allocator`**
|
||||
|
||||
Pick the default allocator implementation that is used if no `zalloc` and `zfree` are configured in the input `z_stream`.
|
||||
|
||||
- `c-allocator`: use `malloc`/`free` for the implementation of `zalloc` and `zfree`
|
||||
- `rust-allocator`: the rust global allocator for the implementation of `zalloc` and `zfree`
|
||||
|
||||
The `rust-allocator` is the default when this crate is used as a rust dependency, and slightly more efficient because alignment is handled by the allocator. When building a dynamic library, it may make sense to use `c-allocator` instead.
|
||||
|
||||
**`std`**
|
||||
|
||||
Assume that `std` is available. When this feature is turned off, this crate is compatible with `#![no_std]`.
|
||||
|
||||
# Example
|
||||
|
||||
This example compresses ("deflates") the string `"Hello, World!"` and then decompresses
|
||||
("inflates") it again.
|
||||
|
||||
```rust
|
||||
let mut strm = libz_rs_sys::z_stream::default();
|
||||
|
||||
let version = libz_rs_sys::zlibVersion();
|
||||
let stream_size = core::mem::size_of_val(&strm) as i32;
|
||||
|
||||
let level = 6; // the default compression level
|
||||
let err = unsafe { libz_rs_sys::deflateInit_(&mut strm, level, version, stream_size) };
|
||||
assert_eq!(err, libz_rs_sys::Z_OK);
|
||||
|
||||
let input = "Hello, World!";
|
||||
strm.avail_in = input.len() as _;
|
||||
strm.next_in = input.as_ptr();
|
||||
|
||||
let mut output = [0u8; 32];
|
||||
strm.avail_out = output.len() as _;
|
||||
strm.next_out = output.as_mut_ptr();
|
||||
|
||||
let err = unsafe { libz_rs_sys::deflate(&mut strm, libz_rs_sys::Z_FINISH) };
|
||||
assert_eq!(err, libz_rs_sys::Z_STREAM_END);
|
||||
|
||||
let err = unsafe { libz_rs_sys::deflateEnd(&mut strm) };
|
||||
assert_eq!(err, libz_rs_sys::Z_OK);
|
||||
|
||||
let deflated = &mut output[..strm.total_out as usize];
|
||||
|
||||
let mut strm = libz_rs_sys::z_stream::default();
|
||||
let err = unsafe { libz_rs_sys::inflateInit_(&mut strm, version, stream_size) };
|
||||
assert_eq!(err, libz_rs_sys::Z_OK);
|
||||
|
||||
strm.avail_in = deflated.len() as _;
|
||||
strm.next_in = deflated.as_ptr();
|
||||
|
||||
let mut output = [0u8; 32];
|
||||
strm.avail_out = output.len() as _;
|
||||
strm.next_out = output.as_mut_ptr();
|
||||
|
||||
let err = unsafe { libz_rs_sys::inflate(&mut strm, libz_rs_sys::Z_FINISH) };
|
||||
assert_eq!(err, libz_rs_sys::Z_STREAM_END);
|
||||
|
||||
let err = unsafe { libz_rs_sys::inflateEnd(&mut strm) };
|
||||
assert_eq!(err, libz_rs_sys::Z_OK);
|
||||
|
||||
let inflated = &output[..strm.total_out as usize];
|
||||
|
||||
assert_eq!(inflated, input.as_bytes())
|
||||
```
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1 @@
|
|||
{"files":{"Cargo.toml":"aa569e465a1555b997bbae2ec7e130f4dd4bbaa4b87803e6965be9305c6e0801","LICENSE":"7d60612df8fcd9d3714871a95b4d3012563246fdea8f6710b7567f83cfa3c8ef","README.md":"9938581c82330440be5f3b6b9125cc02c0874b250dc62093f167bf2158dbe29a","src/adler32.rs":"daa45aa1c83096e8962c5d687da1d99f57eecfa15380084d2634c775bf628a9b","src/adler32/avx2.rs":"7383b4dd559cc946b966240971fb0fc4aa94eecc3f870c277f238c60c7541c71","src/adler32/generic.rs":"cfe8da0e145faac42ed31e9de1ccb718e389265517a007533b8c997cae68f1a7","src/adler32/neon.rs":"1d77a00d009c659cf6113eb661d8e974568077031a6b548a31159cfc3da22c7e","src/allocate.rs":"e0c181e6e052f609d5024abaf327fbe8a6e9c0d7aacdce1ba7c8aae35e6265b4","src/c_api.rs":"8328f52b477beecfc7f68240c40e775516cb773dad8c46cf24aad178c4ea0f6c","src/cpu_features.rs":"e283f3e65ea9bde9fa85fb3e83ba17b8bdf1b32d24e5cff51b15ea34f15847f3","src/crc32.rs":"ef2799e93378aa32d81a146dc5671cbc00b0664b330bdf338456f982e4c92c82","src/crc32/acle.rs":"3b4a7b7101f1c2b3ef531b732d466de4b11a6ce9c86646bac9ed38dd90d25f79","src/crc32/braid.rs":"9911138a714353708b6f9945e2b0debf86ed6879db7497d1c9ecb308eb29def9","src/crc32/combine.rs":"a1aded8c5f1886f60daad9765886299c65feb5240f24f8e9f67ebced14e267f0","src/crc32/pclmulqdq.rs":"891a57eea0e8d5ca4b5d5ee0dd69c62d0c9ecaa741c26ba09abedefe53fd446b","src/deflate.rs":"5e02de2d90106cf966b4613971da4ebe14f04d819fac1e8119b6a3da9e28f70d","src/deflate/algorithm/fast.rs":"686c0a35c1baff2d842287354f919e166fe5eca1748ad46ed14d6127611bffa0","src/deflate/algorithm/huff.rs":"2ed0a098571d4e056bb4e1d8655ec8d37e6d291ba3e2d5d7c581c2486e6abbce","src/deflate/algorithm/medium.rs":"6b3b3e99ad870a6c35921b0abb4b10569e49bf3d5f57a8af2179c886a94e54b6","src/deflate/algorithm/mod.rs":"184151cde5952a4ff0029c6647705be5f884d558bd8552a3d29f9c7a16598c93","src/deflate/algorithm/quick.rs":"3b981d8b80b6e593b21fdf3c73f47e3b3a8b912d5cd803d6752f26b2aef278e1","src/deflate/algorithm/rle.rs":"549427a5a8a69610afd612f89a9cbde97fe78c38c85442083b5dde10e8be4d73","src/deflate/algorithm/slow.rs":"2fa351c77604fad7d5e113ed3b90ba2abc83be0ff589a0e367d012aee5ce967b","src/deflate/algorithm/stored.rs":"0ab4c6e1d901a7460edf3d7e760bf633122b4016d1c0535f31738ac8d0d9b2d8","src/deflate/compare256.rs":"2d476ae9363cdf9f0b06aafd5bcb7899fc66b18cd4802b19a2f4c2adad86af99","src/deflate/hash_calc.rs":"4b9e9629593b27c4331e3d4adab54d262ec8e07af0c37f6ca2f5b578e1ed54a0","src/deflate/longest_match.rs":"176babeb518323f587995de932c623d099444d7565682a0838669159b8213fd8","src/deflate/pending.rs":"84f0860b650570e824607e3ceb53dcc9fbb91a667ba110728cc9d4c995f1bb05","src/deflate/slide_hash.rs":"0b279c4e6d84fb713516772209ff5224fef6072a517f7983d749717d1ff2a2f5","src/deflate/test-data/inflate_buf_error.dat":"254f280f8f1e8914bd12d8bcd3813e5c08083b1b46d8d643c8e2ebe109e75cb8","src/deflate/test-data/paper-100k.pdf":"60f73a051b7ca35bfec44734b2eed7736cb5c0b7f728beb7b97ade6c5e44849b","src/deflate/trees_tbl.rs":"503c65c7648405619a95dc9f5a52ecd558e439e870c116f61ef94128c6a4c52e","src/deflate/window.rs":"317cc28e690710a5905fcda2dbddfa91e27bd8f44380b1d923519c40dc0399e5","src/inflate.rs":"a528850e11437d7e923a5ca193957859c4171963532481232ded2c857df5d05f","src/inflate/bitreader.rs":"09d1844933d044e7a758ad2d68d2ab421b5003881f509b58be611faba6e86295","src/inflate/inffixed_tbl.rs":"eb1ed1927ca07b61fe30ae8461ce62e7da28c595416e687a26db57c8eac8f4a1","src/inflate/inftrees.rs":"44efb568c9cc2dbbc6c51e50f3cc38d6c8e896b93936f47b3879396fc814abfe","src/inflate/window.rs":"8b175bdba8c7f7cd346a4ab408f0218c84ae177f32f2ac581872064269677423","src/lib.rs":"94a27cad5bb21f60b2a49c5bdfcafeef43c643a20ecbce7392e1bda7047e484f","src/read_buf.rs":"1c34709d582568e46223f7f3b116956af314f07238b8105724ed21c1b23ac23c"},"package":null}
|
|
@ -0,0 +1,59 @@
|
|||
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
|
||||
#
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2021"
|
||||
rust-version = "1.75"
|
||||
name = "zlib-rs"
|
||||
version = "0.2.1"
|
||||
publish = true
|
||||
description = "A memory-safe zlib implementation written in rust"
|
||||
homepage = "https://github.com/memorysafety/zlib-rs"
|
||||
readme = "README.md"
|
||||
license = "Zlib"
|
||||
repository = "https://github.com/memorysafety/zlib-rs"
|
||||
|
||||
[dependencies.arbitrary]
|
||||
version = "1.0"
|
||||
features = ["derive"]
|
||||
optional = true
|
||||
|
||||
[dependencies.libz-sys]
|
||||
version = "1.1.19"
|
||||
features = ["zlib-ng"]
|
||||
optional = true
|
||||
default-features = false
|
||||
|
||||
[dependencies.quickcheck]
|
||||
version = "1.0.3"
|
||||
features = []
|
||||
optional = true
|
||||
default-features = false
|
||||
|
||||
[dev-dependencies]
|
||||
crc32fast = "1.3.2"
|
||||
|
||||
[dev-dependencies.quickcheck]
|
||||
version = "1.0.3"
|
||||
features = []
|
||||
default-features = false
|
||||
|
||||
[features]
|
||||
ZLIB_DEBUG = []
|
||||
__internal-fuzz = ["arbitrary"]
|
||||
__internal-test = ["quickcheck"]
|
||||
c-allocator = []
|
||||
default = [
|
||||
"std",
|
||||
"c-allocator",
|
||||
]
|
||||
rust-allocator = []
|
||||
std = ["rust-allocator"]
|
|
@ -0,0 +1,19 @@
|
|||
(C) 2024 Internet Security Research Group
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
|
@ -0,0 +1,6 @@
|
|||
# ⚠️ UNSTABLE⚠️
|
||||
_the public interface of this crate is unstable!_
|
||||
|
||||
A pure-rust implementation of [zlib](https://www.zlib.net/manual.html).
|
||||
|
||||
For a [zlib](https://www.zlib.net/manual.html) -compatible rust api of this crate, see [`libz-rs-sys`](https://crates.io/crates/libz-rs-sys). For a more high-level interface, use [`flate2`](https://crates.io/crates/flate2).
|
|
@ -0,0 +1,133 @@
|
|||
use core::mem::MaybeUninit;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod avx2;
|
||||
mod generic;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod neon;
|
||||
|
||||
pub fn adler32(start_checksum: u32, data: &[u8]) -> u32 {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::cpu_features::is_enabled_avx2() {
|
||||
return avx2::adler32_avx2(start_checksum, data);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
if crate::cpu_features::is_enabled_neon() {
|
||||
return self::neon::adler32_neon(start_checksum, data);
|
||||
}
|
||||
|
||||
generic::adler32_rust(start_checksum, data)
|
||||
}
|
||||
|
||||
pub fn adler32_fold_copy(start_checksum: u32, dst: &mut [MaybeUninit<u8>], src: &[u8]) -> u32 {
|
||||
debug_assert!(dst.len() >= src.len(), "{} < {}", dst.len(), src.len());
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::cpu_features::is_enabled_avx2() {
|
||||
return avx2::adler32_fold_copy_avx2(start_checksum, dst, src);
|
||||
}
|
||||
|
||||
let adler = adler32(start_checksum, src);
|
||||
dst[..src.len()].copy_from_slice(slice_to_uninit(src));
|
||||
adler
|
||||
}
|
||||
|
||||
pub fn adler32_combine(adler1: u32, adler2: u32, len2: u64) -> u32 {
|
||||
const BASE: u64 = self::BASE as u64;
|
||||
|
||||
let rem = len2 % BASE;
|
||||
|
||||
let adler1 = adler1 as u64;
|
||||
let adler2 = adler2 as u64;
|
||||
|
||||
/* the derivation of this formula is left as an exercise for the reader */
|
||||
let mut sum1 = adler1 & 0xffff;
|
||||
let mut sum2 = rem * sum1;
|
||||
sum2 %= BASE;
|
||||
sum1 += (adler2 & 0xffff) + BASE - 1;
|
||||
sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
|
||||
|
||||
if sum1 >= BASE {
|
||||
sum1 -= BASE;
|
||||
}
|
||||
if sum1 >= BASE {
|
||||
sum1 -= BASE;
|
||||
}
|
||||
if sum2 >= (BASE << 1) {
|
||||
sum2 -= BASE << 1;
|
||||
}
|
||||
if sum2 >= BASE {
|
||||
sum2 -= BASE;
|
||||
}
|
||||
|
||||
(sum1 | (sum2 << 16)) as u32
|
||||
}
|
||||
|
||||
// when stable, use MaybeUninit::write_slice
|
||||
fn slice_to_uninit(slice: &[u8]) -> &[MaybeUninit<u8>] {
|
||||
// safety: &[T] and &[MaybeUninit<T>] have the same layout
|
||||
unsafe { &*(slice as *const [u8] as *const [MaybeUninit<u8>]) }
|
||||
}
|
||||
|
||||
// inefficient but correct, useful for testing
|
||||
#[cfg(test)]
|
||||
fn naive_adler32(start_checksum: u32, data: &[u8]) -> u32 {
|
||||
const MOD_ADLER: u32 = 65521; // Largest prime smaller than 2^16
|
||||
|
||||
let mut a = start_checksum & 0xFFFF;
|
||||
let mut b = (start_checksum >> 16) & 0xFFFF;
|
||||
|
||||
for &byte in data {
|
||||
a = (a + byte as u32) % MOD_ADLER;
|
||||
b = (b + a) % MOD_ADLER;
|
||||
}
|
||||
|
||||
(b << 16) | a
|
||||
}
|
||||
|
||||
const BASE: u32 = 65521; /* largest prime smaller than 65536 */
|
||||
const NMAX: u32 = 5552;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn naive_is_fancy_small_inputs() {
|
||||
for i in 0..128 {
|
||||
let v = (0u8..i).collect::<Vec<_>>();
|
||||
assert_eq!(naive_adler32(1, &v), generic::adler32_rust(1, &v));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_adler32_combine() {
|
||||
::quickcheck::quickcheck(test as fn(_) -> _);
|
||||
|
||||
fn test(data: Vec<u8>) -> bool {
|
||||
let Some(buf_len) = data.first().copied() else {
|
||||
return true;
|
||||
};
|
||||
|
||||
let buf_size = Ord::max(buf_len, 1) as usize;
|
||||
|
||||
let mut adler1 = 1;
|
||||
let mut adler2 = 1;
|
||||
|
||||
for chunk in data.chunks(buf_size) {
|
||||
adler1 = adler32(adler1, chunk);
|
||||
}
|
||||
|
||||
adler2 = adler32(adler2, &data);
|
||||
|
||||
assert_eq!(adler1, adler2);
|
||||
|
||||
let combine1 = adler32_combine(adler1, adler2, data.len() as _);
|
||||
let combine2 = adler32_combine(adler1, adler1, data.len() as _);
|
||||
assert_eq!(combine1, combine2);
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,257 @@
|
|||
use core::{
|
||||
arch::x86_64::{
|
||||
__m256i, _mm256_add_epi32, _mm256_castsi256_si128, _mm256_extracti128_si256,
|
||||
_mm256_madd_epi16, _mm256_maddubs_epi16, _mm256_permutevar8x32_epi32, _mm256_sad_epu8,
|
||||
_mm256_slli_epi32, _mm256_storeu_si256, _mm256_zextsi128_si256, _mm_add_epi32,
|
||||
_mm_cvtsi128_si32, _mm_cvtsi32_si128, _mm_shuffle_epi32, _mm_unpackhi_epi64,
|
||||
},
|
||||
mem::MaybeUninit,
|
||||
};
|
||||
|
||||
use crate::adler32::{
|
||||
generic::{adler32_copy_len_16, adler32_len_16, adler32_len_64},
|
||||
BASE, NMAX,
|
||||
};
|
||||
|
||||
const fn __m256i_literal(bytes: [u8; 32]) -> __m256i {
|
||||
unsafe { core::mem::transmute(bytes) }
|
||||
}
|
||||
|
||||
const DOT2V: __m256i = __m256i_literal([
|
||||
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9,
|
||||
8, 7, 6, 5, 4, 3, 2, 1,
|
||||
]);
|
||||
|
||||
const DOT3V: __m256i = __m256i_literal([
|
||||
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
|
||||
]);
|
||||
|
||||
const ZERO: __m256i = __m256i_literal([0; 32]);
|
||||
|
||||
// 32 bit horizontal sum, adapted from Agner Fog's vector library.
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn hsum256(x: __m256i) -> u32 {
|
||||
unsafe {
|
||||
let sum1 = _mm_add_epi32(_mm256_extracti128_si256(x, 1), _mm256_castsi256_si128(x));
|
||||
let sum2 = _mm_add_epi32(sum1, _mm_unpackhi_epi64(sum1, sum1));
|
||||
let sum3 = _mm_add_epi32(sum2, _mm_shuffle_epi32(sum2, 1));
|
||||
_mm_cvtsi128_si32(sum3) as u32
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn partial_hsum256(x: __m256i) -> u32 {
|
||||
const PERM_VEC: __m256i = __m256i_literal([
|
||||
0, 0, 0, 0, //
|
||||
2, 0, 0, 0, //
|
||||
4, 0, 0, 0, //
|
||||
6, 0, 0, 0, //
|
||||
1, 0, 0, 0, //
|
||||
1, 0, 0, 0, //
|
||||
1, 0, 0, 0, //
|
||||
1, 0, 0, 0, //
|
||||
]);
|
||||
|
||||
unsafe {
|
||||
let non_zero = _mm256_permutevar8x32_epi32(x, PERM_VEC);
|
||||
let non_zero_sse = _mm256_castsi256_si128(non_zero);
|
||||
let sum2 = _mm_add_epi32(non_zero_sse, _mm_unpackhi_epi64(non_zero_sse, non_zero_sse));
|
||||
let sum3 = _mm_add_epi32(sum2, _mm_shuffle_epi32(sum2, 1));
|
||||
_mm_cvtsi128_si32(sum3) as u32
|
||||
}
|
||||
}
|
||||
|
||||
pub fn adler32_avx2(adler: u32, src: &[u8]) -> u32 {
|
||||
assert!(crate::cpu_features::is_enabled_avx2());
|
||||
unsafe { adler32_avx2_help::<false>(adler, &mut [], src) }
|
||||
}
|
||||
|
||||
pub fn adler32_fold_copy_avx2(adler: u32, dst: &mut [MaybeUninit<u8>], src: &[u8]) -> u32 {
|
||||
assert!(crate::cpu_features::is_enabled_avx2());
|
||||
unsafe { adler32_avx2_help::<true>(adler, dst, src) }
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn adler32_avx2_help<const COPY: bool>(
|
||||
adler: u32,
|
||||
mut dst: &mut [MaybeUninit<u8>],
|
||||
src: &[u8],
|
||||
) -> u32 {
|
||||
if src.is_empty() {
|
||||
return adler;
|
||||
}
|
||||
|
||||
let (before, middle, after) = unsafe { src.align_to::<__m256i>() };
|
||||
|
||||
let mut adler1 = (adler >> 16) & 0xffff;
|
||||
let mut adler0 = adler & 0xffff;
|
||||
|
||||
let adler = if before.len() < 16 {
|
||||
if COPY {
|
||||
let adler = adler32_copy_len_16(adler0, dst, before, adler1);
|
||||
dst = &mut dst[before.len()..];
|
||||
adler
|
||||
} else {
|
||||
adler32_len_16(adler0, before, adler1)
|
||||
}
|
||||
} else if before.len() < 32 {
|
||||
if COPY {
|
||||
let adler = adler32_copy_len_16(adler0, dst, before, adler1);
|
||||
dst = &mut dst[before.len()..];
|
||||
adler
|
||||
} else {
|
||||
adler32_len_64(adler0, before, adler1)
|
||||
}
|
||||
} else {
|
||||
adler
|
||||
};
|
||||
|
||||
adler1 = (adler >> 16) & 0xffff;
|
||||
adler0 = adler & 0xffff;
|
||||
|
||||
// use largest step possible (without causing overflow)
|
||||
for chunk in middle.chunks(NMAX as usize / 32) {
|
||||
(adler0, adler1) = unsafe { helper_32_bytes::<COPY>(adler0, adler1, dst, chunk) };
|
||||
if COPY {
|
||||
dst = &mut dst[32 * chunk.len()..];
|
||||
}
|
||||
}
|
||||
|
||||
if !after.is_empty() {
|
||||
if after.len() < 16 {
|
||||
if COPY {
|
||||
return adler32_copy_len_16(adler0, dst, after, adler1);
|
||||
} else {
|
||||
return adler32_len_16(adler0, after, adler1);
|
||||
}
|
||||
} else if after.len() < 32 {
|
||||
if COPY {
|
||||
return adler32_copy_len_16(adler0, dst, after, adler1);
|
||||
} else {
|
||||
return adler32_len_64(adler0, after, adler1);
|
||||
}
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
adler0 | (adler1 << 16)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn helper_32_bytes<const COPY: bool>(
|
||||
mut adler0: u32,
|
||||
mut adler1: u32,
|
||||
dst: &mut [MaybeUninit<u8>],
|
||||
src: &[__m256i],
|
||||
) -> (u32, u32) {
|
||||
let mut vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0 as i32));
|
||||
let mut vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1 as i32));
|
||||
|
||||
let mut vs1_0 = vs1;
|
||||
let mut vs3 = ZERO;
|
||||
|
||||
let mut out_chunks = dst.chunks_exact_mut(32);
|
||||
|
||||
for vbuf in src.iter().copied() {
|
||||
if COPY {
|
||||
let out_chunk = out_chunks.next().unwrap();
|
||||
_mm256_storeu_si256(out_chunk.as_mut_ptr() as *mut __m256i, vbuf);
|
||||
}
|
||||
|
||||
let vs1_sad = _mm256_sad_epu8(vbuf, ZERO); // Sum of abs diff, resulting in 2 x int32's
|
||||
|
||||
vs1 = _mm256_add_epi32(vs1, vs1_sad);
|
||||
vs3 = _mm256_add_epi32(vs3, vs1_0);
|
||||
let v_short_sum2 = _mm256_maddubs_epi16(vbuf, DOT2V); // sum 32 uint8s to 16 shorts
|
||||
let vsum2 = _mm256_madd_epi16(v_short_sum2, DOT3V); // sum 16 shorts to 8 uint32s
|
||||
vs2 = _mm256_add_epi32(vsum2, vs2);
|
||||
vs1_0 = vs1;
|
||||
}
|
||||
|
||||
/* Defer the multiplication with 32 to outside of the loop */
|
||||
vs3 = _mm256_slli_epi32(vs3, 5);
|
||||
vs2 = _mm256_add_epi32(vs2, vs3);
|
||||
|
||||
adler0 = partial_hsum256(vs1) % BASE;
|
||||
adler1 = hsum256(vs2) % BASE;
|
||||
|
||||
(adler0, adler1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(target_feature = "avx2")]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn empty_input() {
|
||||
let avx2 = adler32_avx2(0, &[]);
|
||||
let rust = crate::adler32::generic::adler32_rust(0, &[]);
|
||||
|
||||
assert_eq!(rust, avx2);
|
||||
}
|
||||
|
||||
quickcheck::quickcheck! {
|
||||
fn adler32_avx2_is_adler32_rust(v: Vec<u8>, start: u32) -> bool {
|
||||
let avx2 = adler32_avx2(start, &v);
|
||||
let rust = crate::adler32::generic::adler32_rust(start, &v);
|
||||
|
||||
rust == avx2
|
||||
}
|
||||
}
|
||||
|
||||
const INPUT: [u8; 1024] = {
|
||||
let mut array = [0; 1024];
|
||||
let mut i = 0;
|
||||
while i < array.len() {
|
||||
array[i] = i as u8;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
array
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn start_alignment() {
|
||||
// SIMD algorithm is sensitive to alignment;
|
||||
for i in 0..16 {
|
||||
for start in [crate::ADLER32_INITIAL_VALUE as u32, 42] {
|
||||
let avx2 = adler32_avx2(start, &INPUT[i..]);
|
||||
let rust = crate::adler32::generic::adler32_rust(start, &INPUT[i..]);
|
||||
|
||||
assert_eq!(avx2, rust, "offset = {i}, start = {start}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(miri, ignore)]
|
||||
fn large_input() {
|
||||
const DEFAULT: &[u8] = include_bytes!("../deflate/test-data/paper-100k.pdf");
|
||||
|
||||
let avx2 = adler32_avx2(42, DEFAULT);
|
||||
let rust = crate::adler32::generic::adler32_rust(42, DEFAULT);
|
||||
|
||||
assert_eq!(avx2, rust);
|
||||
}
|
||||
|
||||
// TODO: This could use `MaybeUninit::slice_assume_init` when it is stable.
|
||||
unsafe fn slice_assume_init(slice: &[MaybeUninit<u8>]) -> &[u8] {
|
||||
&*(slice as *const [MaybeUninit<u8>] as *const [u8])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fold_copy_copies() {
|
||||
let src: Vec<_> = (0..128).map(|x| x as u8).collect();
|
||||
let mut dst = [MaybeUninit::new(0); 128];
|
||||
|
||||
for (i, _) in src.iter().enumerate() {
|
||||
dst.fill(MaybeUninit::new(0));
|
||||
|
||||
adler32_fold_copy_avx2(1, &mut dst[..i], &src[..i]);
|
||||
|
||||
assert_eq!(&src[..i], unsafe { slice_assume_init(&dst[..i]) })
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,136 @@
|
|||
use core::mem::MaybeUninit;
|
||||
|
||||
use super::{BASE, NMAX};
|
||||
|
||||
const UNROLL_MORE: bool = true;
|
||||
|
||||
// macros for loop unrolling
|
||||
macro_rules! do1 {
|
||||
($sum1:expr, $sum2:expr, $chunk:expr, $i:expr) => {
|
||||
$sum1 += unsafe { *$chunk.get_unchecked($i) } as u32;
|
||||
$sum2 += $sum1;
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! do2 {
|
||||
($sum1:expr, $sum2:expr, $chunk:expr, $i:expr) => {
|
||||
do1!($sum1, $sum2, $chunk, $i);
|
||||
do1!($sum1, $sum2, $chunk, $i + 1);
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! do4 {
|
||||
($sum1:expr, $sum2:expr, $chunk:expr, $i:expr) => {
|
||||
do2!($sum1, $sum2, $chunk, $i);
|
||||
do2!($sum1, $sum2, $chunk, $i + 2);
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! do8 {
|
||||
($sum1:expr, $sum2:expr, $chunk:expr, $i:expr) => {
|
||||
do4!($sum1, $sum2, $chunk, $i);
|
||||
do4!($sum1, $sum2, $chunk, $i + 4);
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! do16 {
|
||||
($sum1:expr, $sum2:expr, $chunk:expr) => {
|
||||
do8!($sum1, $sum2, $chunk, 0);
|
||||
do8!($sum1, $sum2, $chunk, 8);
|
||||
};
|
||||
}
|
||||
|
||||
pub fn adler32_rust(mut adler: u32, buf: &[u8]) -> u32 {
|
||||
/* split Adler-32 into component sums */
|
||||
let mut sum2 = (adler >> 16) & 0xffff;
|
||||
adler &= 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if buf.len() == 1 {
|
||||
return adler32_len_1(adler, buf, sum2);
|
||||
}
|
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if buf.is_empty() {
|
||||
return adler | (sum2 << 16);
|
||||
}
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if buf.len() < 16 {
|
||||
return adler32_len_16(adler, buf, sum2);
|
||||
}
|
||||
|
||||
let mut it = buf.chunks_exact(NMAX as usize);
|
||||
for big_chunk in it.by_ref() {
|
||||
const N: usize = if UNROLL_MORE { 16 } else { 8 } as usize;
|
||||
let it = big_chunk.chunks_exact(N);
|
||||
for chunk in it {
|
||||
if N == 16 {
|
||||
do16!(adler, sum2, chunk);
|
||||
} else {
|
||||
do8!(adler, sum2, chunk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
adler %= BASE;
|
||||
sum2 %= BASE;
|
||||
}
|
||||
|
||||
/* do remaining bytes (less than NMAX, still just one modulo) */
|
||||
return adler32_len_64(adler, it.remainder(), sum2);
|
||||
}
|
||||
|
||||
pub(crate) fn adler32_len_1(mut adler: u32, buf: &[u8], mut sum2: u32) -> u32 {
|
||||
adler += buf[0] as u32;
|
||||
adler %= BASE;
|
||||
sum2 += adler;
|
||||
sum2 %= BASE;
|
||||
adler | (sum2 << 16)
|
||||
}
|
||||
|
||||
pub(crate) fn adler32_len_16(mut adler: u32, buf: &[u8], mut sum2: u32) -> u32 {
|
||||
for b in buf {
|
||||
adler += (*b) as u32;
|
||||
sum2 += adler;
|
||||
}
|
||||
|
||||
adler %= BASE;
|
||||
sum2 %= BASE; /* only added so many BASE's */
|
||||
/* return recombined sums */
|
||||
adler | (sum2 << 16)
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "x86_64"), allow(unused))]
|
||||
pub(crate) fn adler32_copy_len_16(
|
||||
mut adler: u32,
|
||||
dst: &mut [MaybeUninit<u8>],
|
||||
src: &[u8],
|
||||
mut sum2: u32,
|
||||
) -> u32 {
|
||||
for (source, destination) in src.iter().zip(dst.iter_mut()) {
|
||||
let v = *source;
|
||||
*destination = MaybeUninit::new(v);
|
||||
adler += v as u32;
|
||||
sum2 += adler;
|
||||
}
|
||||
|
||||
adler %= BASE;
|
||||
sum2 %= BASE; /* only added so many BASE's */
|
||||
/* return recombined sums */
|
||||
adler | (sum2 << 16)
|
||||
}
|
||||
|
||||
pub(crate) fn adler32_len_64(mut adler: u32, buf: &[u8], mut sum2: u32) -> u32 {
|
||||
const N: usize = if UNROLL_MORE { 16 } else { 8 };
|
||||
let mut it = buf.chunks_exact(N);
|
||||
for chunk in it.by_ref() {
|
||||
if N == 16 {
|
||||
do16!(adler, sum2, chunk);
|
||||
} else {
|
||||
do8!(adler, sum2, chunk, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Process tail (len < 16). */
|
||||
adler32_len_16(adler, it.remainder(), sum2)
|
||||
}
|
|
@ -0,0 +1,242 @@
|
|||
use core::arch::aarch64::{
|
||||
uint16x8_t, uint16x8x2_t, uint16x8x4_t, uint8x16_t, vaddq_u32, vaddw_high_u8, vaddw_u8,
|
||||
vdupq_n_u16, vdupq_n_u32, vget_high_u32, vget_lane_u32, vget_low_u16, vget_low_u32,
|
||||
vget_low_u8, vld1q_u8_x4, vmlal_high_u16, vmlal_u16, vpadalq_u16, vpadalq_u8, vpadd_u32,
|
||||
vpaddlq_u8, vsetq_lane_u32, vshlq_n_u32,
|
||||
};
|
||||
|
||||
use crate::adler32::{
|
||||
generic::{adler32_len_1, adler32_len_16},
|
||||
BASE, NMAX,
|
||||
};
|
||||
|
||||
const TAPS: [uint16x8x4_t; 2] = unsafe {
|
||||
core::mem::transmute::<[u16; 64], [uint16x8x4_t; 2]>([
|
||||
64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42,
|
||||
41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19,
|
||||
18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
|
||||
])
|
||||
};
|
||||
|
||||
pub fn adler32_neon(adler: u32, buf: &[u8]) -> u32 {
|
||||
assert!(crate::cpu_features::is_enabled_neon());
|
||||
unsafe { adler32_neon_internal(adler, buf) }
|
||||
}
|
||||
|
||||
#[target_feature(enable = "neon")]
|
||||
unsafe fn adler32_neon_internal(mut adler: u32, buf: &[u8]) -> u32 {
|
||||
/* split Adler-32 into component sums */
|
||||
let sum2 = (adler >> 16) & 0xffff;
|
||||
adler &= 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if buf.len() == 1 {
|
||||
return adler32_len_1(adler, buf, sum2);
|
||||
}
|
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if buf.is_empty() {
|
||||
return adler | (sum2 << 16);
|
||||
}
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if buf.len() < 16 {
|
||||
return adler32_len_16(adler, buf, sum2);
|
||||
}
|
||||
|
||||
// Split Adler-32 into component sums, it can be supplied by the caller sites (e.g. in a PNG file).
|
||||
let mut pair = (adler, sum2);
|
||||
|
||||
// If memory is not SIMD aligned, do scalar sums to an aligned
|
||||
// offset, provided that doing so doesn't completely eliminate
|
||||
// SIMD operation. Aligned loads are still faster on ARM, even
|
||||
// though there's no explicit aligned load instruction
|
||||
const _: () = assert!(core::mem::align_of::<uint8x16_t>() == 16);
|
||||
let (before, middle, after) = unsafe { buf.align_to::<uint8x16_t>() };
|
||||
|
||||
pair = handle_tail(pair, before);
|
||||
|
||||
for chunk in middle.chunks(NMAX as usize / core::mem::size_of::<uint8x16_t>()) {
|
||||
pair = unsafe { accum32(pair, chunk) };
|
||||
pair.0 %= BASE;
|
||||
pair.1 %= BASE;
|
||||
}
|
||||
|
||||
if !after.is_empty() {
|
||||
pair = handle_tail(pair, after);
|
||||
pair.0 %= BASE;
|
||||
pair.1 %= BASE;
|
||||
}
|
||||
|
||||
// D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32.
|
||||
(pair.1 << 16) | pair.0
|
||||
}
|
||||
|
||||
fn handle_tail(mut pair: (u32, u32), buf: &[u8]) -> (u32, u32) {
|
||||
for x in buf {
|
||||
pair.0 += *x as u32;
|
||||
pair.1 += pair.0;
|
||||
}
|
||||
|
||||
pair
|
||||
}
|
||||
|
||||
#[target_feature(enable = "neon")]
|
||||
unsafe fn accum32(s: (u32, u32), buf: &[uint8x16_t]) -> (u32, u32) {
|
||||
let mut adacc = vdupq_n_u32(0);
|
||||
let mut s2acc = vdupq_n_u32(0);
|
||||
|
||||
adacc = vsetq_lane_u32(s.0, adacc, 0);
|
||||
s2acc = vsetq_lane_u32(s.1, s2acc, 0);
|
||||
|
||||
let mut s3acc = vdupq_n_u32(0);
|
||||
let mut adacc_prev = adacc;
|
||||
|
||||
let mut s2_0 = vdupq_n_u16(0);
|
||||
let mut s2_1 = vdupq_n_u16(0);
|
||||
let mut s2_2 = vdupq_n_u16(0);
|
||||
let mut s2_3 = vdupq_n_u16(0);
|
||||
|
||||
let mut s2_4 = vdupq_n_u16(0);
|
||||
let mut s2_5 = vdupq_n_u16(0);
|
||||
let mut s2_6 = vdupq_n_u16(0);
|
||||
let mut s2_7 = vdupq_n_u16(0);
|
||||
|
||||
let mut it = buf.chunks_exact(4);
|
||||
|
||||
for chunk in &mut it {
|
||||
let d0_d3 = vld1q_u8_x4(chunk.as_ptr() as *const u8);
|
||||
|
||||
// Unfortunately it doesn't look like there's a direct sum 8 bit to 32
|
||||
// bit instruction, we'll have to make due summing to 16 bits first
|
||||
let hsum = uint16x8x2_t(vpaddlq_u8(d0_d3.0), vpaddlq_u8(d0_d3.1));
|
||||
|
||||
let hsum_fold = uint16x8x2_t(vpadalq_u8(hsum.0, d0_d3.2), vpadalq_u8(hsum.1, d0_d3.3));
|
||||
|
||||
adacc = vpadalq_u16(adacc, hsum_fold.0);
|
||||
s3acc = vaddq_u32(s3acc, adacc_prev);
|
||||
adacc = vpadalq_u16(adacc, hsum_fold.1);
|
||||
|
||||
// If we do straight widening additions to the 16 bit values, we don't incur
|
||||
// the usual penalties of a pairwise add. We can defer the multiplications
|
||||
// until the very end. These will not overflow because we are incurring at
|
||||
// most 408 loop iterations (NMAX / 64), and a given lane is only going to be
|
||||
// summed into once. This means for the maximum input size, the largest value
|
||||
// we will see is 255 * 102 = 26010, safely under uint16 max
|
||||
s2_0 = vaddw_u8(s2_0, vget_low_u8(d0_d3.0));
|
||||
s2_1 = vaddw_high_u8(s2_1, d0_d3.0);
|
||||
s2_2 = vaddw_u8(s2_2, vget_low_u8(d0_d3.1));
|
||||
s2_3 = vaddw_high_u8(s2_3, d0_d3.1);
|
||||
s2_4 = vaddw_u8(s2_4, vget_low_u8(d0_d3.2));
|
||||
s2_5 = vaddw_high_u8(s2_5, d0_d3.2);
|
||||
s2_6 = vaddw_u8(s2_6, vget_low_u8(d0_d3.3));
|
||||
s2_7 = vaddw_high_u8(s2_7, d0_d3.3);
|
||||
|
||||
adacc_prev = adacc;
|
||||
}
|
||||
|
||||
s3acc = vshlq_n_u32(s3acc, 6);
|
||||
|
||||
let remainder = it.remainder();
|
||||
|
||||
if !remainder.is_empty() {
|
||||
let mut s3acc_0 = vdupq_n_u32(0);
|
||||
for d0 in remainder.iter().copied() {
|
||||
let adler: uint16x8_t = vpaddlq_u8(d0);
|
||||
s2_6 = vaddw_u8(s2_6, vget_low_u8(d0));
|
||||
s2_7 = vaddw_high_u8(s2_7, d0);
|
||||
adacc = vpadalq_u16(adacc, adler);
|
||||
s3acc_0 = vaddq_u32(s3acc_0, adacc_prev);
|
||||
adacc_prev = adacc;
|
||||
}
|
||||
|
||||
s3acc_0 = vshlq_n_u32(s3acc_0, 4);
|
||||
s3acc = vaddq_u32(s3acc_0, s3acc);
|
||||
}
|
||||
|
||||
let t0_t3 = TAPS[0];
|
||||
let t4_t7 = TAPS[1];
|
||||
|
||||
let mut s2acc_0 = vdupq_n_u32(0);
|
||||
let mut s2acc_1 = vdupq_n_u32(0);
|
||||
let mut s2acc_2 = vdupq_n_u32(0);
|
||||
|
||||
s2acc = vmlal_high_u16(s2acc, t0_t3.0, s2_0);
|
||||
s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t0_t3.0), vget_low_u16(s2_0));
|
||||
s2acc_1 = vmlal_high_u16(s2acc_1, t0_t3.1, s2_1);
|
||||
s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t0_t3.1), vget_low_u16(s2_1));
|
||||
|
||||
s2acc = vmlal_high_u16(s2acc, t0_t3.2, s2_2);
|
||||
s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t0_t3.2), vget_low_u16(s2_2));
|
||||
s2acc_1 = vmlal_high_u16(s2acc_1, t0_t3.3, s2_3);
|
||||
s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t0_t3.3), vget_low_u16(s2_3));
|
||||
|
||||
s2acc = vmlal_high_u16(s2acc, t4_t7.0, s2_4);
|
||||
s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t4_t7.0), vget_low_u16(s2_4));
|
||||
s2acc_1 = vmlal_high_u16(s2acc_1, t4_t7.1, s2_5);
|
||||
s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t4_t7.1), vget_low_u16(s2_5));
|
||||
|
||||
s2acc = vmlal_high_u16(s2acc, t4_t7.2, s2_6);
|
||||
s2acc_0 = vmlal_u16(s2acc_0, vget_low_u16(t4_t7.2), vget_low_u16(s2_6));
|
||||
s2acc_1 = vmlal_high_u16(s2acc_1, t4_t7.3, s2_7);
|
||||
s2acc_2 = vmlal_u16(s2acc_2, vget_low_u16(t4_t7.3), vget_low_u16(s2_7));
|
||||
|
||||
s2acc = vaddq_u32(s2acc_0, s2acc);
|
||||
s2acc_2 = vaddq_u32(s2acc_1, s2acc_2);
|
||||
s2acc = vaddq_u32(s2acc, s2acc_2);
|
||||
|
||||
let s2acc = vaddq_u32(s2acc, s3acc);
|
||||
let adacc2 = vpadd_u32(vget_low_u32(adacc), vget_high_u32(adacc));
|
||||
let s2acc2 = vpadd_u32(vget_low_u32(s2acc), vget_high_u32(s2acc));
|
||||
let as_ = vpadd_u32(adacc2, s2acc2);
|
||||
|
||||
(vget_lane_u32(as_, 0), vget_lane_u32(as_, 1))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
quickcheck::quickcheck! {
|
||||
fn adler32_neon_is_adler32_rust(v: Vec<u8>, start: u32) -> bool {
|
||||
let neon = adler32_neon(start, &v);
|
||||
let rust = crate::adler32::generic::adler32_rust(start, &v);
|
||||
|
||||
rust == neon
|
||||
}
|
||||
}
|
||||
|
||||
const INPUT: [u8; 1024] = {
|
||||
let mut array = [0; 1024];
|
||||
let mut i = 0;
|
||||
while i < array.len() {
|
||||
array[i] = i as u8;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
array
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn start_alignment() {
|
||||
// SIMD algorithm is sensitive to alignment;
|
||||
for i in 0..16 {
|
||||
for start in [crate::ADLER32_INITIAL_VALUE as u32, 42] {
|
||||
let neon = adler32_neon(start, &INPUT[i..]);
|
||||
let rust = crate::adler32::generic::adler32_rust(start, &INPUT[i..]);
|
||||
|
||||
assert_eq!(neon, rust, "offset = {i}, start = {start}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn large_input() {
|
||||
const DEFAULT: &[u8] = include_bytes!("../deflate/test-data/paper-100k.pdf");
|
||||
|
||||
let neon = adler32_neon(42, &DEFAULT);
|
||||
let rust = crate::adler32::generic::adler32_rust(42, &DEFAULT);
|
||||
|
||||
assert_eq!(neon, rust);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,353 @@
|
|||
use core::ffi::c_int;
|
||||
use core::{
|
||||
alloc::Layout,
|
||||
ffi::{c_uint, c_void},
|
||||
marker::PhantomData,
|
||||
mem::MaybeUninit,
|
||||
};
|
||||
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
use alloc::alloc::GlobalAlloc;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
type size_t = usize;
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// This function is safe, but must have this type signature to be used elsewhere in the library
|
||||
#[cfg(unix)]
|
||||
unsafe extern "C" fn zalloc_c(opaque: *mut c_void, items: c_uint, size: c_uint) -> *mut c_void {
|
||||
let _ = opaque;
|
||||
|
||||
extern "C" {
|
||||
fn posix_memalign(memptr: *mut *mut c_void, align: size_t, size: size_t) -> c_int;
|
||||
}
|
||||
|
||||
let mut ptr = core::ptr::null_mut();
|
||||
match posix_memalign(&mut ptr, 64, items as size_t * size as size_t) {
|
||||
0 => ptr,
|
||||
_ => core::ptr::null_mut(),
|
||||
}
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// This function is safe, but must have this type signature to be used elsewhere in the library
|
||||
#[cfg(not(unix))]
|
||||
unsafe extern "C" fn zalloc_c(opaque: *mut c_void, items: c_uint, size: c_uint) -> *mut c_void {
|
||||
let _ = opaque;
|
||||
|
||||
extern "C" {
|
||||
fn malloc(size: size_t) -> *mut c_void;
|
||||
}
|
||||
|
||||
malloc(items as size_t * size as size_t)
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// The `ptr` must be allocated with the allocator that is used internally by `zcfree`
|
||||
unsafe extern "C" fn zfree_c(opaque: *mut c_void, ptr: *mut c_void) {
|
||||
let _ = opaque;
|
||||
|
||||
extern "C" {
|
||||
fn free(p: *mut c_void);
|
||||
}
|
||||
|
||||
unsafe { free(ptr) }
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// This function is safe to call.
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
unsafe extern "C" fn zalloc_rust(_opaque: *mut c_void, count: c_uint, size: c_uint) -> *mut c_void {
|
||||
let align = 64;
|
||||
let size = count as usize * size as usize;
|
||||
|
||||
// internally, we want to align allocations to 64 bytes (in part for SIMD reasons)
|
||||
let layout = Layout::from_size_align(size, align).unwrap();
|
||||
|
||||
let ptr = std::alloc::System.alloc(layout);
|
||||
|
||||
ptr as *mut c_void
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `ptr` must be allocated with the rust `alloc::System` allocator
|
||||
/// - `opaque` is a `&usize` that represents the size of the allocation
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
unsafe extern "C" fn zfree_rust(opaque: *mut c_void, ptr: *mut c_void) {
|
||||
if ptr.is_null() {
|
||||
return;
|
||||
}
|
||||
|
||||
// we can't really do much else. Deallocating with an invalid layout is UB.
|
||||
debug_assert!(!opaque.is_null());
|
||||
if opaque.is_null() {
|
||||
return;
|
||||
}
|
||||
|
||||
let size = *(opaque as *mut usize);
|
||||
let align = 64;
|
||||
|
||||
let layout = Layout::from_size_align(size, align);
|
||||
let layout = layout.unwrap();
|
||||
|
||||
std::alloc::System.dealloc(ptr.cast(), layout);
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct Allocator<'a> {
|
||||
pub zalloc: crate::c_api::alloc_func,
|
||||
pub zfree: crate::c_api::free_func,
|
||||
pub opaque: crate::c_api::voidpf,
|
||||
pub _marker: PhantomData<&'a ()>,
|
||||
}
|
||||
|
||||
impl Allocator<'static> {
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
pub const RUST: Self = Self {
|
||||
zalloc: zalloc_rust,
|
||||
zfree: zfree_rust,
|
||||
opaque: core::ptr::null_mut(),
|
||||
_marker: PhantomData,
|
||||
};
|
||||
|
||||
#[cfg(feature = "c-allocator")]
|
||||
pub const C: Self = Self {
|
||||
zalloc: zalloc_c,
|
||||
zfree: zfree_c,
|
||||
opaque: core::ptr::null_mut(),
|
||||
_marker: PhantomData,
|
||||
};
|
||||
}
|
||||
|
||||
impl<'a> Allocator<'a> {
|
||||
pub fn allocate_layout(&self, layout: Layout) -> *mut c_void {
|
||||
// Special case for the Rust `alloc` backed allocator
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
if self.zalloc == Allocator::RUST.zalloc {
|
||||
let ptr = unsafe { (Allocator::RUST.zalloc)(self.opaque, layout.size() as _, 1) };
|
||||
|
||||
debug_assert_eq!(ptr as usize % layout.align(), 0);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// General case for c-style allocation
|
||||
|
||||
// We cannot rely on the allocator giving properly aligned allocations and have to fix that ourselves.
|
||||
//
|
||||
// The general approach is to allocate a bit more than the layout needs, so that we can
|
||||
// give the application a properly aligned address and also store the real allocation
|
||||
// pointer in the allocation so that `free` can free the real allocation pointer.
|
||||
//
|
||||
//
|
||||
// Example: The layout represents `(u32, u32)`, with an alignment of 4 bytes and a
|
||||
// total size of 8 bytes.
|
||||
//
|
||||
// Assume that the allocator will give us address `0x07`. We need that to be a multiple
|
||||
// of the alignment, so that shifts the starting position to `0x08`. Then we also need
|
||||
// to store the pointer to the start of the allocation so that `free` can free that
|
||||
// pointer, bumping to `0x10`. The `0x10` pointer is then the pointer that the application
|
||||
// deals with. When free'ing, the original allocation pointer can be read from `0x10 - size_of::<*const c_void>()`.
|
||||
//
|
||||
// Of course there does need to be enough space in the allocation such that when we
|
||||
// shift the start forwards, the end is still within the allocation. Hence we allocate
|
||||
// `extra_space` bytes: enough for a full alignment plus a pointer.
|
||||
|
||||
// we need at least
|
||||
//
|
||||
// - `align` extra space so that no matter what pointer we get from zalloc, we can shift the start of the
|
||||
// allocation by at most `align - 1` so that `ptr as usize % align == 0
|
||||
// - `size_of::<*mut _>` extra space so that after aligning to `align`,
|
||||
// there is `size_of::<*mut _>` space to store the pointer to the allocation.
|
||||
// This pointer is then retrieved in `free`
|
||||
let extra_space = core::mem::size_of::<*mut c_void>() + layout.align();
|
||||
|
||||
// Safety: we assume allocating works correctly in the safety assumptions on
|
||||
// `DeflateStream` and `InflateStream`.
|
||||
let ptr = unsafe { (self.zalloc)(self.opaque, (layout.size() + extra_space) as _, 1) };
|
||||
|
||||
if ptr.is_null() {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Calculate return pointer address with space enough to store original pointer
|
||||
let align_diff = (ptr as usize).next_multiple_of(layout.align()) - (ptr as usize);
|
||||
|
||||
// Safety: offset is smaller than 64, and we allocated 64 extra bytes in the allocation
|
||||
let mut return_ptr = unsafe { ptr.cast::<u8>().add(align_diff) };
|
||||
|
||||
// if there is not enough space to store a pointer we need to make more
|
||||
if align_diff < core::mem::size_of::<*mut c_void>() {
|
||||
// # Safety
|
||||
//
|
||||
// - `return_ptr` is well-aligned, therefore `return_ptr + align` is also well-aligned
|
||||
// - we reserve `size_of::<*mut _> + align` extra space in the allocation, so
|
||||
// `ptr + align_diff + align` is still valid for (at least) `layout.size` bytes
|
||||
let offset = Ord::max(core::mem::size_of::<*mut c_void>(), layout.align());
|
||||
return_ptr = unsafe { return_ptr.add(offset) };
|
||||
}
|
||||
|
||||
// Store the original pointer for free()
|
||||
//
|
||||
// Safety: `align >= size_of::<*mut _>`, so there is now space for a pointer before `return_ptr`
|
||||
// in the allocation
|
||||
unsafe {
|
||||
let original_ptr = return_ptr.sub(core::mem::size_of::<*mut c_void>());
|
||||
core::ptr::write_unaligned(original_ptr.cast::<*mut c_void>(), ptr);
|
||||
};
|
||||
|
||||
// Return properly aligned pointer in allocation
|
||||
let ptr = return_ptr.cast::<c_void>();
|
||||
|
||||
debug_assert_eq!(ptr as usize % layout.align(), 0);
|
||||
|
||||
ptr
|
||||
}
|
||||
|
||||
pub fn allocate<T>(&self) -> Option<&'a mut MaybeUninit<T>> {
|
||||
let ptr = self.allocate_layout(Layout::new::<T>());
|
||||
|
||||
if ptr.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(unsafe { &mut *(ptr as *mut MaybeUninit<T>) })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn allocate_slice<T>(&self, len: usize) -> Option<&'a mut [MaybeUninit<T>]> {
|
||||
let ptr = self.allocate_layout(Layout::array::<T>(len).ok()?);
|
||||
|
||||
if ptr.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(unsafe { core::slice::from_raw_parts_mut(ptr.cast(), len) })
|
||||
}
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
///
|
||||
/// - when `len` is 0
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// - `ptr` must be allocated with this allocator
|
||||
/// - `len` must be the number of `T`s that are in this allocation
|
||||
#[allow(unused)] // Rust needs `len` for deallocation
|
||||
pub unsafe fn deallocate<T>(&self, ptr: *mut T, len: usize) {
|
||||
if !ptr.is_null() {
|
||||
// Special case for the Rust `alloc` backed allocator
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
if self.zfree == Allocator::RUST.zfree {
|
||||
assert_ne!(len, 0, "invalid size for {:?}", ptr);
|
||||
let mut size = core::mem::size_of::<T>() * len;
|
||||
return (Allocator::RUST.zfree)(&mut size as *mut usize as *mut c_void, ptr.cast());
|
||||
}
|
||||
|
||||
// General case for c-style allocation
|
||||
let original_ptr = (ptr as *mut u8).sub(core::mem::size_of::<*const c_void>());
|
||||
let free_ptr = core::ptr::read_unaligned(original_ptr as *mut *mut c_void);
|
||||
|
||||
(self.zfree)(self.opaque, free_ptr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use core::sync::atomic::{AtomicPtr, Ordering};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use super::*;
|
||||
|
||||
static PTR: AtomicPtr<c_void> = AtomicPtr::new(core::ptr::null_mut());
|
||||
static MUTEX: Mutex<()> = Mutex::new(());
|
||||
|
||||
unsafe extern "C" fn unaligned_alloc(
|
||||
_opaque: *mut c_void,
|
||||
_items: c_uint,
|
||||
_size: c_uint,
|
||||
) -> *mut c_void {
|
||||
PTR.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
unsafe extern "C" fn unaligned_free(_opaque: *mut c_void, ptr: *mut c_void) {
|
||||
let expected = PTR.load(Ordering::Relaxed);
|
||||
assert_eq!(expected, ptr)
|
||||
}
|
||||
|
||||
fn unaligned_allocator_help<T>() {
|
||||
let mut buf = [0u8; 1024];
|
||||
|
||||
// we don't want anyone else messing with the PTR static
|
||||
let _guard = MUTEX.lock().unwrap();
|
||||
|
||||
for i in 0..64 {
|
||||
let ptr = unsafe { buf.as_mut_ptr().add(i).cast() };
|
||||
PTR.store(ptr, Ordering::Relaxed);
|
||||
|
||||
let allocator = Allocator {
|
||||
zalloc: unaligned_alloc,
|
||||
zfree: unaligned_free,
|
||||
opaque: core::ptr::null_mut(),
|
||||
_marker: PhantomData,
|
||||
};
|
||||
|
||||
let ptr = allocator.allocate::<T>().unwrap();
|
||||
assert_eq!(ptr.as_ptr() as usize % core::mem::align_of::<T>(), 0);
|
||||
unsafe { allocator.deallocate(ptr, 1) }
|
||||
|
||||
let ptr = allocator.allocate_slice::<T>(10).unwrap();
|
||||
assert_eq!(ptr.as_ptr() as usize % core::mem::align_of::<T>(), 0);
|
||||
unsafe { allocator.deallocate(ptr.as_mut_ptr(), 10) }
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unaligned_allocator_0() {
|
||||
unaligned_allocator_help::<()>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unaligned_allocator_1() {
|
||||
unaligned_allocator_help::<u8>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unaligned_allocator_2() {
|
||||
unaligned_allocator_help::<u16>()
|
||||
}
|
||||
#[test]
|
||||
fn unaligned_allocator_4() {
|
||||
unaligned_allocator_help::<u32>()
|
||||
}
|
||||
#[test]
|
||||
fn unaligned_allocator_8() {
|
||||
unaligned_allocator_help::<u64>()
|
||||
}
|
||||
#[test]
|
||||
fn unaligned_allocator_16() {
|
||||
unaligned_allocator_help::<u128>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unaligned_allocator_32() {
|
||||
#[repr(C, align(32))]
|
||||
struct Align32(u8);
|
||||
|
||||
unaligned_allocator_help::<Align32>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unaligned_allocator_64() {
|
||||
#[repr(C, align(64))]
|
||||
struct Align64(u8);
|
||||
|
||||
unaligned_allocator_help::<Align64>()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,228 @@
|
|||
#![allow(non_camel_case_types)]
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use core::ffi::{c_char, c_int, c_uchar, c_uint, c_ulong, c_void};
|
||||
|
||||
use crate::allocate::Allocator;
|
||||
|
||||
pub type alloc_func = unsafe extern "C" fn(voidpf, uInt, uInt) -> voidpf;
|
||||
pub type free_func = unsafe extern "C" fn(voidpf, voidpf);
|
||||
|
||||
pub type Bytef = u8;
|
||||
pub type in_func = unsafe extern "C" fn(*mut c_void, *mut *const c_uchar) -> c_uint;
|
||||
pub type out_func = unsafe extern "C" fn(*mut c_void, *mut c_uchar, c_uint) -> c_int;
|
||||
pub type uInt = c_uint;
|
||||
pub type uLong = c_ulong;
|
||||
pub type uLongf = c_ulong;
|
||||
pub type voidp = *mut c_void;
|
||||
pub type voidpc = *const c_void;
|
||||
pub type voidpf = *mut c_void;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct z_stream {
|
||||
pub next_in: *const Bytef,
|
||||
pub avail_in: uInt,
|
||||
pub total_in: z_size,
|
||||
pub next_out: *mut Bytef,
|
||||
pub avail_out: uInt,
|
||||
pub total_out: z_size,
|
||||
pub msg: *mut c_char,
|
||||
pub state: *mut internal_state,
|
||||
pub zalloc: Option<alloc_func>,
|
||||
pub zfree: Option<free_func>,
|
||||
pub opaque: voidpf,
|
||||
pub data_type: c_int,
|
||||
pub adler: z_checksum,
|
||||
pub reserved: uLong,
|
||||
}
|
||||
pub type z_streamp = *mut z_stream;
|
||||
|
||||
impl Default for z_stream {
|
||||
fn default() -> Self {
|
||||
let mut stream = Self {
|
||||
next_in: core::ptr::null_mut(),
|
||||
avail_in: 0,
|
||||
total_in: 0,
|
||||
next_out: core::ptr::null_mut(),
|
||||
avail_out: 0,
|
||||
total_out: 0,
|
||||
msg: core::ptr::null_mut(),
|
||||
state: core::ptr::null_mut(),
|
||||
zalloc: None,
|
||||
zfree: None,
|
||||
opaque: core::ptr::null_mut(),
|
||||
data_type: 0,
|
||||
adler: 0,
|
||||
reserved: 0,
|
||||
};
|
||||
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
if stream.zalloc.is_none() || stream.zfree.is_none() {
|
||||
stream.configure_default_rust_allocator()
|
||||
}
|
||||
|
||||
#[cfg(feature = "c-allocator")]
|
||||
if stream.zalloc.is_none() || stream.zfree.is_none() {
|
||||
stream.configure_default_c_allocator()
|
||||
}
|
||||
|
||||
stream
|
||||
}
|
||||
}
|
||||
|
||||
impl z_stream {
|
||||
fn configure_allocator(&mut self, alloc: Allocator) {
|
||||
self.zalloc = Some(alloc.zalloc);
|
||||
self.zfree = Some(alloc.zfree);
|
||||
self.opaque = alloc.opaque;
|
||||
}
|
||||
|
||||
#[cfg(feature = "rust-allocator")]
|
||||
pub fn configure_default_rust_allocator(&mut self) {
|
||||
self.configure_allocator(Allocator::RUST)
|
||||
}
|
||||
|
||||
#[cfg(feature = "c-allocator")]
|
||||
pub fn configure_default_c_allocator(&mut self) {
|
||||
self.configure_allocator(Allocator::C)
|
||||
}
|
||||
}
|
||||
|
||||
// // zlib stores Adler-32 and CRC-32 checksums in unsigned long; zlib-ng uses uint32_t.
|
||||
pub(crate) type z_size = c_ulong;
|
||||
pub(crate) type z_checksum = c_ulong;
|
||||
|
||||
// opaque to the user
|
||||
pub enum internal_state {}
|
||||
|
||||
pub const Z_NO_FLUSH: c_int = 0;
|
||||
pub const Z_PARTIAL_FLUSH: c_int = 1;
|
||||
pub const Z_SYNC_FLUSH: c_int = 2;
|
||||
pub const Z_FULL_FLUSH: c_int = 3;
|
||||
pub const Z_FINISH: c_int = 4;
|
||||
pub const Z_BLOCK: c_int = 5;
|
||||
pub const Z_TREES: c_int = 6;
|
||||
|
||||
pub const Z_OK: c_int = 0;
|
||||
pub const Z_STREAM_END: c_int = 1;
|
||||
pub const Z_NEED_DICT: c_int = 2;
|
||||
pub const Z_ERRNO: c_int = -1;
|
||||
pub const Z_STREAM_ERROR: c_int = -2;
|
||||
pub const Z_DATA_ERROR: c_int = -3;
|
||||
pub const Z_MEM_ERROR: c_int = -4;
|
||||
pub const Z_BUF_ERROR: c_int = -5;
|
||||
pub const Z_VERSION_ERROR: c_int = -6;
|
||||
|
||||
pub const Z_NO_COMPRESSION: c_int = 0;
|
||||
pub const Z_BEST_SPEED: c_int = 1;
|
||||
pub const Z_BEST_COMPRESSION: c_int = 9;
|
||||
pub const Z_DEFAULT_COMPRESSION: c_int = -1;
|
||||
|
||||
pub const Z_DEFLATED: c_int = 8;
|
||||
|
||||
pub const Z_BINARY: c_int = 0;
|
||||
pub const Z_TEXT: c_int = 1;
|
||||
pub const Z_ASCII: c_int = Z_TEXT; /* for compatibility with 1.2.2 and earlier */
|
||||
pub const Z_UNKNOWN: c_int = 2;
|
||||
|
||||
pub const Z_FILTERED: c_int = 1;
|
||||
pub const Z_HUFFMAN_ONLY: c_int = 2;
|
||||
pub const Z_RLE: c_int = 3;
|
||||
pub const Z_FIXED: c_int = 4;
|
||||
pub const Z_DEFAULT_STRATEGY: c_int = 0;
|
||||
|
||||
pub type gz_headerp = *mut gz_header;
|
||||
|
||||
/// gzip header information passed to and from zlib routines.
|
||||
/// See RFC 1952 for more details on the meanings of these fields.
|
||||
#[derive(Debug)]
|
||||
#[repr(C)]
|
||||
pub struct gz_header {
|
||||
/// true if compressed data believed to be text
|
||||
pub text: i32,
|
||||
/// modification time
|
||||
pub time: c_ulong,
|
||||
/// extra flags (not used when writing a gzip file)
|
||||
pub xflags: i32,
|
||||
/// operating system
|
||||
pub os: i32,
|
||||
/// pointer to extra field or NULL if none
|
||||
pub extra: *mut u8,
|
||||
/// extra field length (valid if extra != NULL)
|
||||
pub extra_len: u32,
|
||||
/// space at extra (only when reading header)
|
||||
pub extra_max: u32,
|
||||
/// pointer to zero-terminated file name or NULL
|
||||
pub name: *mut u8,
|
||||
/// space at name (only when reading header)
|
||||
pub name_max: u32,
|
||||
/// pointer to zero-terminated comment or NULL
|
||||
pub comment: *mut u8,
|
||||
/// space at comment (only when reading header)
|
||||
pub comm_max: u32,
|
||||
/// true if there was or will be a header crc
|
||||
pub hcrc: i32,
|
||||
/// true when done reading gzip header (not used when writing a gzip file)
|
||||
pub done: i32,
|
||||
}
|
||||
|
||||
impl Default for gz_header {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
text: 0,
|
||||
time: 0,
|
||||
xflags: 0,
|
||||
os: 0,
|
||||
extra: core::ptr::null_mut(),
|
||||
extra_len: 0,
|
||||
extra_max: 0,
|
||||
name: core::ptr::null_mut(),
|
||||
name_max: 0,
|
||||
comment: core::ptr::null_mut(),
|
||||
comm_max: 0,
|
||||
hcrc: 0,
|
||||
done: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl gz_header {
|
||||
// based on the spec https://www.ietf.org/rfc/rfc1952.txt
|
||||
//
|
||||
// 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
|
||||
// 1 - Amiga
|
||||
// 2 - VMS (or OpenVMS)
|
||||
// 3 - Unix
|
||||
// 4 - VM/CMS
|
||||
// 5 - Atari TOS
|
||||
// 6 - HPFS filesystem (OS/2, NT)
|
||||
// 7 - Macintosh
|
||||
// 8 - Z-System
|
||||
// 9 - CP/M
|
||||
// 10 - TOPS-20
|
||||
// 11 - NTFS filesystem (NT)
|
||||
// 12 - QDOS
|
||||
// 13 - Acorn RISCOS
|
||||
// 255 - unknown
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
pub const OS_CODE: u8 = {
|
||||
if cfg!(windows) {
|
||||
10
|
||||
} else if cfg!(target_os = "macos") {
|
||||
19
|
||||
} else if cfg!(unix) {
|
||||
3
|
||||
} else {
|
||||
3 // assume unix
|
||||
}
|
||||
};
|
||||
|
||||
pub(crate) fn flags(&self) -> u8 {
|
||||
(if self.text > 0 { 1 } else { 0 })
|
||||
+ (if self.hcrc > 0 { 2 } else { 0 })
|
||||
+ (if self.extra.is_null() { 0 } else { 4 })
|
||||
+ (if self.name.is_null() { 0 } else { 8 })
|
||||
+ (if self.comment.is_null() { 0 } else { 16 })
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
#![allow(dead_code)]
|
||||
#![allow(unreachable_code)]
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_enabled_sse() -> bool {
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[cfg(feature = "std")]
|
||||
return std::is_x86_feature_detected!("sse");
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_enabled_sse42() -> bool {
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[cfg(feature = "std")]
|
||||
return std::is_x86_feature_detected!("sse4.2");
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_enabled_avx2() -> bool {
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[cfg(feature = "std")]
|
||||
return std::is_x86_feature_detected!("avx2");
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_enabled_avx512() -> bool {
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[cfg(feature = "std")]
|
||||
return std::is_x86_feature_detected!("avx512f");
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_enabled_pclmulqdq() -> bool {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg(feature = "std")]
|
||||
return std::is_x86_feature_detected!("pclmulqdq")
|
||||
&& std::is_x86_feature_detected!("sse2")
|
||||
&& std::is_x86_feature_detected!("sse4.1");
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_enabled_neon() -> bool {
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[cfg(feature = "std")]
|
||||
return std::arch::is_aarch64_feature_detected!("neon");
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_enabled_crc() -> bool {
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[cfg(feature = "std")]
|
||||
return std::arch::is_aarch64_feature_detected!("crc");
|
||||
|
||||
false
|
||||
}
|
|
@ -0,0 +1,262 @@
|
|||
use core::mem::MaybeUninit;
|
||||
|
||||
use crate::{read_buf::ReadBuf, CRC32_INITIAL_VALUE};
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub(crate) mod acle;
|
||||
mod braid;
|
||||
mod combine;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod pclmulqdq;
|
||||
|
||||
pub use combine::crc32_combine;
|
||||
|
||||
pub fn crc32(start: u32, buf: &[u8]) -> u32 {
|
||||
/* For lens < 64, crc32_braid method is faster. The CRC32 instruction for
|
||||
* these short lengths might also prove to be effective */
|
||||
if buf.len() < 64 {
|
||||
return crc32_braid(start, buf);
|
||||
}
|
||||
|
||||
let mut crc_state = Crc32Fold::new_with_initial(start);
|
||||
crc_state.fold(buf, start);
|
||||
crc_state.finish()
|
||||
}
|
||||
|
||||
pub fn crc32_braid(start: u32, buf: &[u8]) -> u32 {
|
||||
braid::crc32_braid::<5>(start, buf)
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn crc32_copy(dst: &mut ReadBuf, buf: &[u8]) -> u32 {
|
||||
/* For lens < 64, crc32_braid method is faster. The CRC32 instruction for
|
||||
* these short lengths might also prove to be effective */
|
||||
if buf.len() < 64 {
|
||||
dst.extend(buf);
|
||||
return braid::crc32_braid::<5>(CRC32_INITIAL_VALUE, buf);
|
||||
}
|
||||
|
||||
let mut crc_state = Crc32Fold::new();
|
||||
|
||||
crc_state.fold_copy(unsafe { dst.inner_mut() }, buf);
|
||||
unsafe { dst.assume_init(buf.len()) };
|
||||
dst.set_filled(buf.len());
|
||||
|
||||
crc_state.finish()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Crc32Fold {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fold: pclmulqdq::Accumulator,
|
||||
value: u32,
|
||||
}
|
||||
|
||||
impl Default for Crc32Fold {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Crc32Fold {
|
||||
pub const fn new() -> Self {
|
||||
Self::new_with_initial(CRC32_INITIAL_VALUE)
|
||||
}
|
||||
|
||||
pub const fn new_with_initial(initial: u32) -> Self {
|
||||
Self {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fold: pclmulqdq::Accumulator::new(),
|
||||
value: initial,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "x86_64"), allow(unused))]
|
||||
pub(crate) fn is_pclmulqdq_enabled() -> bool {
|
||||
crate::cpu_features::is_enabled_pclmulqdq()
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "aarch64"), allow(unused))]
|
||||
pub(crate) fn is_crc_enabled() -> bool {
|
||||
crate::cpu_features::is_enabled_crc()
|
||||
}
|
||||
|
||||
pub fn fold(&mut self, src: &[u8], _start: u32) {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if Self::is_pclmulqdq_enabled() {
|
||||
return self.fold.fold(src, _start);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
if Self::is_crc_enabled() {
|
||||
self.value = self::acle::crc32_acle_aarch64(self.value, src);
|
||||
return;
|
||||
}
|
||||
|
||||
// in this case the start value is ignored
|
||||
self.value = braid::crc32_braid::<5>(self.value, src);
|
||||
}
|
||||
|
||||
pub fn fold_copy(&mut self, dst: &mut [MaybeUninit<u8>], src: &[u8]) {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if Self::is_pclmulqdq_enabled() {
|
||||
return self.fold.fold_copy(dst, src);
|
||||
}
|
||||
|
||||
self.fold(src, 0);
|
||||
dst[..src.len()].copy_from_slice(slice_to_uninit(src));
|
||||
}
|
||||
|
||||
pub fn finish(self) -> u32 {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if Self::is_pclmulqdq_enabled() {
|
||||
return unsafe { self.fold.finish() };
|
||||
}
|
||||
|
||||
self.value
|
||||
}
|
||||
}
|
||||
|
||||
// when stable, use MaybeUninit::write_slice
|
||||
fn slice_to_uninit(slice: &[u8]) -> &[MaybeUninit<u8>] {
|
||||
// safety: &[T] and &[MaybeUninit<T>] have the same layout
|
||||
unsafe { &*(slice as *const [u8] as *const [MaybeUninit<u8>]) }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use test::braid::crc32_braid;
|
||||
|
||||
use super::*;
|
||||
|
||||
const INPUT: [u8; 1024] = {
|
||||
let mut array = [0; 1024];
|
||||
let mut i = 0;
|
||||
while i < array.len() {
|
||||
array[i] = i as u8;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
array
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_crc32_fold() {
|
||||
// input large enough to trigger the SIMD
|
||||
let mut h = crc32fast::Hasher::new_with_initial(CRC32_INITIAL_VALUE);
|
||||
h.update(&INPUT);
|
||||
assert_eq!(crc32(CRC32_INITIAL_VALUE, &INPUT), h.finalize());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc32_fold_align() {
|
||||
// SIMD algorithm is sensitive to alignment;
|
||||
for i in 0..16 {
|
||||
for start in [CRC32_INITIAL_VALUE, 42] {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&INPUT[i..]);
|
||||
assert_eq!(
|
||||
crc32(start, &INPUT[i..]),
|
||||
h.finalize(),
|
||||
"offset = {i}, start = {start}"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc32_fold_copy() {
|
||||
// input large enough to trigger the SIMD
|
||||
let mut h = crc32fast::Hasher::new_with_initial(CRC32_INITIAL_VALUE);
|
||||
h.update(&INPUT);
|
||||
let mut dst = [0; INPUT.len()];
|
||||
let mut dst = ReadBuf::new(&mut dst);
|
||||
|
||||
assert_eq!(crc32_copy(&mut dst, &INPUT), h.finalize());
|
||||
|
||||
assert_eq!(INPUT, dst.filled());
|
||||
}
|
||||
|
||||
quickcheck::quickcheck! {
|
||||
fn crc_fold_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v);
|
||||
|
||||
let a = crc32(start, &v) ;
|
||||
let b = h.finalize();
|
||||
|
||||
a == b
|
||||
}
|
||||
|
||||
fn crc_fold_copy_is_crc32fast(v: Vec<u8>) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(CRC32_INITIAL_VALUE);
|
||||
h.update(&v);
|
||||
|
||||
let mut dst = vec![0; v.len()];
|
||||
let mut dst = ReadBuf::new(&mut dst);
|
||||
|
||||
let a = crc32_copy(&mut dst, &v) ;
|
||||
let b = h.finalize();
|
||||
|
||||
assert_eq!(a,b);
|
||||
|
||||
v == dst.filled()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chunked() {
|
||||
const INPUT: &[&[u8]] = &[
|
||||
&[116],
|
||||
&[111, 107, 105, 111, 44, 32, 97, 115],
|
||||
&[121, 110, 99, 45, 115, 116, 100, 44],
|
||||
&[32, 97, 110, 100, 32, 115, 109, 111],
|
||||
&[108, 46, 32, 89, 111, 117, 226, 128],
|
||||
&[153, 118, 101, 32, 112, 114, 111, 98],
|
||||
&[97, 98, 108, 121, 32, 117, 115, 101],
|
||||
&[100, 32, 116, 104, 101, 109, 32, 97],
|
||||
&[116, 32, 115, 111, 109, 101, 32, 112],
|
||||
&[111, 105, 110, 116, 44, 32, 101, 105],
|
||||
&[116, 104, 101, 114, 32, 100, 105, 114],
|
||||
&[101, 99, 116, 108, 121, 32, 111, 114],
|
||||
&[0],
|
||||
];
|
||||
|
||||
const START: u32 = 2380683574;
|
||||
|
||||
let mut in_chunks = START;
|
||||
for chunk in INPUT {
|
||||
in_chunks = crc32(in_chunks, chunk);
|
||||
}
|
||||
|
||||
let flattened: Vec<_> = INPUT.iter().copied().flatten().copied().collect();
|
||||
let flat = crc32(START, &flattened);
|
||||
|
||||
assert_eq!(in_chunks, flat);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nasty_alignment() {
|
||||
const START: u32 = 2380683574;
|
||||
|
||||
const FLAT: &[u8] = &[
|
||||
116, 111, 107, 105, 111, 44, 32, 97, 115, 121, 110, 99, 45, 115, 116, 100, 44, 32, 97,
|
||||
110, 100, 32, 115, 109, 111, 108, 46, 32, 89, 111, 117, 226, 128, 153, 118, 101, 32,
|
||||
112, 114, 111, 98, 97, 98, 108, 121, 32, 117, 115, 101, 100, 32, 116, 104, 101, 109,
|
||||
32, 97, 116, 32, 115, 111, 109, 101, 32, 112, 111, 105, 110, 116, 44, 32, 101, 105,
|
||||
116, 104, 101, 114, 32, 100, 105, 114, 101, 99, 116, 108, 121, 32, 111, 114, 0,
|
||||
];
|
||||
|
||||
let mut i = 0;
|
||||
let mut flat = FLAT.to_vec();
|
||||
while flat[i..].as_ptr() as usize % 16 != 15 {
|
||||
flat.insert(0, 0);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let flat = &flat[i..];
|
||||
|
||||
assert_eq!(crc32_braid::<5>(START, flat), crc32(START, flat));
|
||||
assert_eq!(crc32(2380683574, flat), 1175758345);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
#[cfg_attr(not(target_arch = "aarch64"), allow(unused))]
|
||||
pub fn crc32_acle_aarch64(crc: u32, buf: &[u8]) -> u32 {
|
||||
let mut c = !crc;
|
||||
|
||||
let (before, middle, after) = unsafe { buf.align_to::<u64>() };
|
||||
|
||||
c = remainder(c, before);
|
||||
|
||||
if middle.is_empty() && after.is_empty() {
|
||||
return !c;
|
||||
}
|
||||
|
||||
for d in middle {
|
||||
c = unsafe { __crc32d(c, *d) };
|
||||
}
|
||||
|
||||
c = remainder(c, after);
|
||||
|
||||
!c
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_arch = "arm"), allow(unused))]
|
||||
pub fn crc32_acle_arm(crc: u32, buf: &[u8]) -> u32 {
|
||||
let mut c = !crc;
|
||||
|
||||
let (before, middle, after) = unsafe { buf.align_to::<u32>() };
|
||||
|
||||
c = remainder(c, before);
|
||||
|
||||
if middle.is_empty() && after.is_empty() {
|
||||
return !c;
|
||||
}
|
||||
|
||||
for w in middle {
|
||||
c = unsafe { __crc32w(c, *w) };
|
||||
}
|
||||
|
||||
c = remainder(c, after);
|
||||
|
||||
!c
|
||||
}
|
||||
|
||||
fn remainder(mut c: u32, mut buf: &[u8]) -> u32 {
|
||||
if let [b0, b1, b2, b3, rest @ ..] = buf {
|
||||
c = unsafe { __crc32w(c, u32::from_le_bytes([*b0, *b1, *b2, *b3])) };
|
||||
buf = rest;
|
||||
}
|
||||
|
||||
if let [b0, b1, rest @ ..] = buf {
|
||||
c = unsafe { __crc32h(c, u16::from_le_bytes([*b0, *b1])) };
|
||||
buf = rest;
|
||||
}
|
||||
|
||||
if let [b0, rest @ ..] = buf {
|
||||
c = unsafe { __crc32b(c, *b0) };
|
||||
buf = rest;
|
||||
}
|
||||
|
||||
debug_assert!(buf.is_empty());
|
||||
|
||||
c
|
||||
}
|
||||
|
||||
/// CRC32 single round checksum for bytes (8 bits).
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32b)
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
unsafe fn __crc32b(mut crc: u32, data: u8) -> u32 {
|
||||
core::arch::asm!("crc32b {crc:w}, {crc:w}, {data:w}", crc = inout(reg) crc, data = in(reg) data);
|
||||
crc
|
||||
}
|
||||
|
||||
/// CRC32 single round checksum for half words (16 bits).
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32h)
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
unsafe fn __crc32h(mut crc: u32, data: u16) -> u32 {
|
||||
core::arch::asm!("crc32h {crc:w}, {crc:w}, {data:w}", crc = inout(reg) crc, data = in(reg) data);
|
||||
crc
|
||||
}
|
||||
|
||||
/// CRC32 single round checksum for words (32 bits).
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32w)
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
pub unsafe fn __crc32w(mut crc: u32, data: u32) -> u32 {
|
||||
core::arch::asm!("crc32w {crc:w}, {crc:w}, {data:w}", crc = inout(reg) crc, data = in(reg) data);
|
||||
crc
|
||||
}
|
||||
|
||||
/// CRC32 single round checksum for double words (64 bits).
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32d)
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[target_feature(enable = "crc")]
|
||||
unsafe fn __crc32d(mut crc: u32, data: u64) -> u32 {
|
||||
core::arch::asm!("crc32x {crc:w}, {crc:w}, {data:x}", crc = inout(reg) crc, data = in(reg) data);
|
||||
crc
|
||||
}
|
||||
|
||||
/// CRC32-C single round checksum for words (32 bits).
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cw)
|
||||
#[target_feature(enable = "crc")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
|
||||
pub unsafe fn __crc32cw(mut crc: u32, data: u32) -> u32 {
|
||||
core::arch::asm!("crc32cw {crc:w}, {crc:w}, {data:w}", crc = inout(reg) crc, data = in(reg) data);
|
||||
crc
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
quickcheck::quickcheck! {
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
fn crc32_acle_aarch64_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v);
|
||||
|
||||
let a = crc32_acle_aarch64(start, &v) ;
|
||||
let b = h.finalize();
|
||||
|
||||
a == b
|
||||
}
|
||||
|
||||
fn crc32_acle_arm_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v);
|
||||
|
||||
let a = crc32_acle_arm(start, &v) ;
|
||||
let b = h.finalize();
|
||||
|
||||
a == b
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc32b() {
|
||||
if !crate::crc32::Crc32Fold::is_crc_enabled() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
assert_eq!(__crc32b(0, 0), 0);
|
||||
assert_eq!(__crc32b(0, 255), 755167117);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc32h() {
|
||||
if !crate::crc32::Crc32Fold::is_crc_enabled() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
assert_eq!(__crc32h(0, 0), 0);
|
||||
assert_eq!(__crc32h(0, 16384), 1994146192);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc32w() {
|
||||
if !crate::crc32::Crc32Fold::is_crc_enabled() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
assert_eq!(__crc32w(0, 0), 0);
|
||||
assert_eq!(__crc32w(0, 4294967295), 3736805603);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
fn test_crc32d() {
|
||||
if !crate::crc32::Crc32Fold::is_crc_enabled() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
assert_eq!(__crc32d(0, 0), 0);
|
||||
assert_eq!(__crc32d(0, 18446744073709551615), 1147535477);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_crc32cw() {
|
||||
if !crate::crc32::Crc32Fold::is_crc_enabled() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
assert_eq!(__crc32cw(0, 0), 0);
|
||||
assert_eq!(__crc32cw(0, 4294967295), 3080238136);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,202 @@
|
|||
// Several implementations of CRC-32:
|
||||
// * A naive byte-granularity approach
|
||||
// * A word-sized approach that processes a usize word at a time
|
||||
// * A "braid" implementation that processes a block of N words
|
||||
// at a time, based on the algorithm in section 4.11 from
|
||||
// https://github.com/zlib-ng/zlib-ng/blob/develop/doc/crc-doc.1.0.pdf.
|
||||
|
||||
// The binary encoding of the CRC-32 polynomial.
|
||||
// We are assuming little-endianness so we process the input
|
||||
// LSB-first. We need to use the "reversed" value from e.g
|
||||
// https://en.wikipedia.org/wiki/Cyclic_redundancy_check#Polynomial_representations.
|
||||
pub(crate) const CRC32_LSB_POLY: usize = 0xedb8_8320usize;
|
||||
|
||||
const W: usize = core::mem::size_of::<usize>();
|
||||
|
||||
// The logic assumes that W >= sizeof(u32).
|
||||
// In Rust, this is generally true.
|
||||
const _: () = assert!(W >= core::mem::size_of::<u32>());
|
||||
|
||||
// Pre-computed tables for the CRC32 algorithm.
|
||||
// CRC32_BYTE_TABLE corresponds to MulByXPowD from the paper.
|
||||
static CRC32_BYTE_TABLE: [[u32; 256]; 1] = build_crc32_table::<256, 1, 1>();
|
||||
// CRC32_WORD_TABLE is MulWordByXpowD.
|
||||
static CRC32_WORD_TABLE: [[u32; 256]; W] = build_crc32_table::<256, W, 1>();
|
||||
|
||||
// Work-around for not being able to define generic consts or statics
|
||||
// Crc32BraidTable::<N>::TABLE is the generic table for any braid size N.
|
||||
struct Crc32BraidTable<const N: usize>;
|
||||
|
||||
impl<const N: usize> Crc32BraidTable<N> {
|
||||
const TABLE: [[u32; 256]; W] = build_crc32_table::<256, W, N>();
|
||||
}
|
||||
|
||||
// Build the CRC32 tables using a more efficient and simpler approach
|
||||
// than the combination of Multiply and XpowN (which implement polynomial
|
||||
// multiplication and exponentiation, respectively) from the paper,
|
||||
// but with identical results. This function is const, so it should be
|
||||
// fully evaluated at compile time.
|
||||
const fn build_crc32_table<const A: usize, const W: usize, const N: usize>() -> [[u32; A]; W] {
|
||||
let mut arr = [[0u32; A]; W];
|
||||
let mut i = 0;
|
||||
while i < W {
|
||||
let mut j = 0;
|
||||
while j < A {
|
||||
let mut c = j;
|
||||
let mut k = 0;
|
||||
while k < 8 * (W * N - i) {
|
||||
if c & 1 != 0 {
|
||||
c = CRC32_LSB_POLY ^ (c >> 1);
|
||||
} else {
|
||||
c >>= 1;
|
||||
}
|
||||
k += 1;
|
||||
}
|
||||
arr[i][j] = c as u32;
|
||||
j += 1;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
arr
|
||||
}
|
||||
|
||||
fn crc32_naive_inner(data: &[u8], start: u32) -> u32 {
|
||||
data.iter().fold(start, |crc, val| {
|
||||
let crc32_lsb = crc.to_le_bytes()[0];
|
||||
CRC32_BYTE_TABLE[0][usize::from(crc32_lsb ^ *val)] ^ (crc >> 8)
|
||||
})
|
||||
}
|
||||
|
||||
fn crc32_words_inner(words: &[usize], start: u32, per_word_crcs: &[u32]) -> u32 {
|
||||
words.iter().enumerate().fold(start, |crc, (i, word)| {
|
||||
let value = word.to_le() ^ (crc ^ per_word_crcs.get(i).unwrap_or(&0)) as usize;
|
||||
value
|
||||
.to_le_bytes()
|
||||
.into_iter()
|
||||
.zip(CRC32_WORD_TABLE)
|
||||
.fold(0u32, |crc, (b, tab)| crc ^ tab[usize::from(b)])
|
||||
})
|
||||
}
|
||||
|
||||
pub fn crc32_braid<const N: usize>(start: u32, data: &[u8]) -> u32 {
|
||||
// Get a word-aligned sub-slice of the input data
|
||||
let (prefix, words, suffix) = unsafe { data.align_to::<usize>() };
|
||||
let crc = !start;
|
||||
let crc = crc32_naive_inner(prefix, crc);
|
||||
|
||||
let mut crcs = [0u32; N];
|
||||
crcs[0] = crc;
|
||||
|
||||
// TODO: this would normally use words.chunks_exact(N), but
|
||||
// we need to pass the last full block to crc32_words_inner
|
||||
// because we accumulate partial crcs in the array and we
|
||||
// need to roll those into the final value. The last call to
|
||||
// crc32_words_inner does that for us with its per_word_crcs
|
||||
// argument.
|
||||
let blocks = words.len() / N;
|
||||
let blocks = blocks.saturating_sub(1);
|
||||
for i in 0..blocks {
|
||||
// Load the next N words.
|
||||
let mut buffer: [usize; N] =
|
||||
core::array::from_fn(|j| usize::to_le(words[i * N + j]) ^ (crcs[j] as usize));
|
||||
|
||||
crcs.fill(0);
|
||||
for j in 0..W {
|
||||
for k in 0..N {
|
||||
crcs[k] ^= Crc32BraidTable::<N>::TABLE[j][buffer[k] & 0xff];
|
||||
buffer[k] >>= 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let crc = core::mem::take(&mut crcs[0]);
|
||||
let crc = crc32_words_inner(&words[blocks * N..], crc, &crcs);
|
||||
let crc = crc32_naive_inner(suffix, crc);
|
||||
!crc
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
fn crc32_naive(data: &[u8], start: u32) -> u32 {
|
||||
let crc = !start;
|
||||
let crc = crc32_naive_inner(data, crc);
|
||||
!crc
|
||||
}
|
||||
|
||||
fn crc32_words(data: &[u8], start: u32) -> u32 {
|
||||
// Get a word-aligned sub-slice of the input data
|
||||
let (prefix, words, suffix) = unsafe { data.align_to::<usize>() };
|
||||
let crc = !start;
|
||||
let crc = crc32_naive_inner(prefix, crc);
|
||||
let crc = crc32_words_inner(words, crc, &[]);
|
||||
let crc = crc32_naive_inner(suffix, crc);
|
||||
!crc
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_is_identity() {
|
||||
assert_eq!(crc32_naive(&[], 32), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn words_endianness() {
|
||||
let v = [0, 0, 0, 0, 0, 16, 0, 1];
|
||||
let start = 1534327806;
|
||||
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v[..]);
|
||||
assert_eq!(crc32_words(&v[..], start), h.finalize());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn crc32_naive_inner_endianness_and_alignment() {
|
||||
assert_eq!(crc32_naive_inner(&[0, 1], 0), 1996959894);
|
||||
|
||||
let v: Vec<_> = (0..1024).map(|i| i as u8).collect();
|
||||
let start = 0;
|
||||
|
||||
// test alignment
|
||||
for i in 0..8 {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v[i..]);
|
||||
assert_eq!(crc32_braid::<5>(start, &v[i..]), h.finalize());
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck::quickcheck! {
|
||||
fn naive_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v[..]);
|
||||
crc32_naive(&v[..], start) == h.finalize()
|
||||
}
|
||||
|
||||
fn words_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v[..]);
|
||||
crc32_words(&v[..], start) == h.finalize()
|
||||
}
|
||||
|
||||
#[cfg_attr(miri, ignore)]
|
||||
fn braid_4_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v[..]);
|
||||
crc32_braid::<4>(start, &v[..]) == h.finalize()
|
||||
}
|
||||
|
||||
#[cfg_attr(miri, ignore)]
|
||||
fn braid_5_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v[..]);
|
||||
crc32_braid::<5>(start, &v[..]) == h.finalize()
|
||||
}
|
||||
|
||||
#[cfg_attr(miri, ignore)]
|
||||
fn braid_6_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
|
||||
let mut h = crc32fast::Hasher::new_with_initial(start);
|
||||
h.update(&v[..]);
|
||||
crc32_braid::<6>(start, &v[..]) == h.finalize()
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
use super::braid::CRC32_LSB_POLY;
|
||||
|
||||
pub const fn crc32_combine(crc1: u32, crc2: u32, len2: u64) -> u32 {
|
||||
crc32_combine_op(crc1, crc2, crc32_combine_gen(len2))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
const fn crc32_combine_gen(len2: u64) -> u32 {
|
||||
x2nmodp(len2, 3)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
const fn crc32_combine_op(crc1: u32, crc2: u32, op: u32) -> u32 {
|
||||
multmodp(op, crc1) ^ crc2
|
||||
}
|
||||
|
||||
const X2N_TABLE: [u32; 32] = [
|
||||
0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, 0xedb88320, 0xb1e6b092, 0xa06a2517,
|
||||
0xed627dae, 0x88d14467, 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, 0x09fe548f,
|
||||
0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e,
|
||||
0xbad90e37, 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, 0xc40ba6d0, 0xc4e22c3c,
|
||||
];
|
||||
|
||||
// Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial,
|
||||
// reflected. For speed, this requires that a not be zero.
|
||||
const fn multmodp(a: u32, mut b: u32) -> u32 {
|
||||
let mut m = 1 << 31;
|
||||
let mut p = 0;
|
||||
|
||||
loop {
|
||||
if (a & m) != 0 {
|
||||
p ^= b;
|
||||
if (a & (m - 1)) == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
m >>= 1;
|
||||
b = if (b & 1) != 0 {
|
||||
(b >> 1) ^ CRC32_LSB_POLY as u32
|
||||
} else {
|
||||
b >> 1
|
||||
};
|
||||
}
|
||||
|
||||
p
|
||||
}
|
||||
|
||||
// Return x^(n * 2^k) modulo p(x).
|
||||
const fn x2nmodp(mut n: u64, mut k: u32) -> u32 {
|
||||
let mut p: u32 = 1 << 31; /* x^0 == 1 */
|
||||
|
||||
while n > 0 {
|
||||
if (n & 1) != 0 {
|
||||
p = multmodp(X2N_TABLE[k as usize & 31], p);
|
||||
}
|
||||
n >>= 1;
|
||||
k += 1;
|
||||
}
|
||||
|
||||
p
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
use crate::crc32;
|
||||
|
||||
#[test]
|
||||
fn test_crc32_combine() {
|
||||
::quickcheck::quickcheck(test as fn(_) -> _);
|
||||
|
||||
fn test(data: Vec<u8>) -> bool {
|
||||
let Some(buf_len) = data.first().copied() else {
|
||||
return true;
|
||||
};
|
||||
|
||||
let buf_size = Ord::max(buf_len, 1) as usize;
|
||||
|
||||
let crc0 = 0;
|
||||
let mut crc1 = crc0;
|
||||
let mut crc2 = crc0;
|
||||
|
||||
/* CRC32 */
|
||||
for chunk in data.chunks(buf_size) {
|
||||
let crc3 = crc32(crc0, chunk);
|
||||
let op = crc32_combine_gen(chunk.len() as _);
|
||||
let crc4 = crc32_combine_op(crc1, crc3, op);
|
||||
crc1 = crc32(crc1, chunk);
|
||||
|
||||
assert_eq!(crc1, crc4);
|
||||
}
|
||||
|
||||
crc2 = crc32(crc2, &data);
|
||||
|
||||
assert_eq!(crc1, crc2);
|
||||
|
||||
let combine1 = crc32_combine(crc1, crc2, data.len() as _);
|
||||
let combine2 = crc32_combine(crc1, crc1, data.len() as _);
|
||||
assert_eq!(combine1, combine2);
|
||||
|
||||
// Fast CRC32 combine.
|
||||
let op = crc32_combine_gen(data.len() as _);
|
||||
let combine1 = crc32_combine_op(crc1, crc2, op);
|
||||
let combine2 = crc32_combine_op(crc2, crc1, op);
|
||||
assert_eq!(combine1, combine2);
|
||||
|
||||
let combine1 = crc32_combine(crc1, crc2, data.len() as _);
|
||||
let combine2 = crc32_combine_op(crc2, crc1, op);
|
||||
assert_eq!(combine1, combine2);
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,342 @@
|
|||
use core::arch::x86_64::__m128i;
|
||||
use core::{
|
||||
arch::x86_64::{
|
||||
_mm_and_si128, _mm_clmulepi64_si128, _mm_extract_epi32, _mm_load_si128, _mm_loadu_si128,
|
||||
_mm_or_si128, _mm_shuffle_epi8, _mm_slli_si128, _mm_srli_si128, _mm_storeu_si128,
|
||||
_mm_xor_si128,
|
||||
},
|
||||
mem::MaybeUninit,
|
||||
};
|
||||
|
||||
use crate::{crc32::slice_to_uninit, CRC32_INITIAL_VALUE};
|
||||
|
||||
#[derive(Debug)]
|
||||
#[repr(C, align(16))]
|
||||
struct Align16<T>(T);
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
const fn reg(input: [u32; 4]) -> __m128i {
|
||||
// safety: any valid [u32; 4] represents a valid __m128i
|
||||
unsafe { core::mem::transmute(input) }
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub(crate) struct Accumulator {
|
||||
fold: [__m128i; 4],
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
impl Accumulator {
|
||||
const XMM_FOLD4: __m128i = reg([0xc6e41596u32, 0x00000001u32, 0x54442bd4u32, 0x00000001u32]);
|
||||
|
||||
pub const fn new() -> Self {
|
||||
let xmm_crc0 = reg([0x9db42487, 0, 0, 0]);
|
||||
let xmm_zero = reg([0, 0, 0, 0]);
|
||||
|
||||
Self {
|
||||
fold: [xmm_crc0, xmm_zero, xmm_zero, xmm_zero],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fold(&mut self, src: &[u8], start: u32) {
|
||||
unsafe { self.fold_help::<false>(&mut [], src, start) }
|
||||
}
|
||||
|
||||
pub fn fold_copy(&mut self, dst: &mut [MaybeUninit<u8>], src: &[u8]) {
|
||||
unsafe { self.fold_help::<true>(dst, src, 0) }
|
||||
}
|
||||
|
||||
#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")]
|
||||
pub unsafe fn finish(self) -> u32 {
|
||||
const CRC_MASK1: __m128i =
|
||||
reg([0xFFFFFFFFu32, 0xFFFFFFFFu32, 0x00000000u32, 0x00000000u32]);
|
||||
|
||||
const CRC_MASK2: __m128i =
|
||||
reg([0x00000000u32, 0xFFFFFFFFu32, 0xFFFFFFFFu32, 0xFFFFFFFFu32]);
|
||||
|
||||
const RK1_RK2: __m128i = reg([
|
||||
0xccaa009e, 0x00000000, /* rk1 */
|
||||
0x751997d0, 0x00000001, /* rk2 */
|
||||
]);
|
||||
|
||||
const RK5_RK6: __m128i = reg([
|
||||
0xccaa009e, 0x00000000, /* rk5 */
|
||||
0x63cd6124, 0x00000001, /* rk6 */
|
||||
]);
|
||||
|
||||
const RK7_RK8: __m128i = reg([
|
||||
0xf7011640, 0x00000001, /* rk7 */
|
||||
0xdb710640, 0x00000001, /* rk8 */
|
||||
]);
|
||||
|
||||
let [mut xmm_crc0, mut xmm_crc1, mut xmm_crc2, mut xmm_crc3] = self.fold;
|
||||
|
||||
/*
|
||||
* k1
|
||||
*/
|
||||
let mut crc_fold = RK1_RK2;
|
||||
|
||||
let x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10);
|
||||
xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01);
|
||||
xmm_crc1 = _mm_xor_si128(xmm_crc1, x_tmp0);
|
||||
xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_crc0);
|
||||
|
||||
let x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10);
|
||||
xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01);
|
||||
xmm_crc2 = _mm_xor_si128(xmm_crc2, x_tmp1);
|
||||
xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_crc1);
|
||||
|
||||
let x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10);
|
||||
xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, x_tmp2);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
|
||||
|
||||
/*
|
||||
* k5
|
||||
*/
|
||||
crc_fold = RK5_RK6;
|
||||
|
||||
xmm_crc0 = xmm_crc3;
|
||||
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
|
||||
xmm_crc0 = _mm_srli_si128(xmm_crc0, 8);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
|
||||
|
||||
xmm_crc0 = xmm_crc3;
|
||||
xmm_crc3 = _mm_slli_si128(xmm_crc3, 4);
|
||||
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
|
||||
xmm_crc3 = _mm_and_si128(xmm_crc3, CRC_MASK2);
|
||||
|
||||
/*
|
||||
* k7
|
||||
*/
|
||||
xmm_crc1 = xmm_crc3;
|
||||
xmm_crc2 = xmm_crc3;
|
||||
crc_fold = RK7_RK8;
|
||||
|
||||
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
|
||||
xmm_crc3 = _mm_and_si128(xmm_crc3, CRC_MASK1);
|
||||
|
||||
xmm_crc2 = xmm_crc3;
|
||||
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
|
||||
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1);
|
||||
|
||||
!(_mm_extract_epi32(xmm_crc3, 2) as u32)
|
||||
}
|
||||
|
||||
fn fold_step<const N: usize>(&mut self) {
|
||||
self.fold = core::array::from_fn(|i| match self.fold.get(i + N) {
|
||||
Some(v) => *v,
|
||||
None => unsafe { Self::step(self.fold[(i + N) - 4]) },
|
||||
});
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn step(input: __m128i) -> __m128i {
|
||||
_mm_xor_si128(
|
||||
_mm_clmulepi64_si128(input, Self::XMM_FOLD4, 0x01),
|
||||
_mm_clmulepi64_si128(input, Self::XMM_FOLD4, 0x10),
|
||||
)
|
||||
}
|
||||
|
||||
unsafe fn partial_fold(&mut self, xmm_crc_part: __m128i, len: usize) {
|
||||
const PSHUFB_SHF_TABLE: [__m128i; 15] = [
|
||||
reg([0x84838281, 0x88878685, 0x8c8b8a89, 0x008f8e8d]), /* shl 15 (16 - 1)/shr1 */
|
||||
reg([0x85848382, 0x89888786, 0x8d8c8b8a, 0x01008f8e]), /* shl 14 (16 - 3)/shr2 */
|
||||
reg([0x86858483, 0x8a898887, 0x8e8d8c8b, 0x0201008f]), /* shl 13 (16 - 4)/shr3 */
|
||||
reg([0x87868584, 0x8b8a8988, 0x8f8e8d8c, 0x03020100]), /* shl 12 (16 - 4)/shr4 */
|
||||
reg([0x88878685, 0x8c8b8a89, 0x008f8e8d, 0x04030201]), /* shl 11 (16 - 5)/shr5 */
|
||||
reg([0x89888786, 0x8d8c8b8a, 0x01008f8e, 0x05040302]), /* shl 10 (16 - 6)/shr6 */
|
||||
reg([0x8a898887, 0x8e8d8c8b, 0x0201008f, 0x06050403]), /* shl 9 (16 - 7)/shr7 */
|
||||
reg([0x8b8a8988, 0x8f8e8d8c, 0x03020100, 0x07060504]), /* shl 8 (16 - 8)/shr8 */
|
||||
reg([0x8c8b8a89, 0x008f8e8d, 0x04030201, 0x08070605]), /* shl 7 (16 - 9)/shr9 */
|
||||
reg([0x8d8c8b8a, 0x01008f8e, 0x05040302, 0x09080706]), /* shl 6 (16 -10)/shr10*/
|
||||
reg([0x8e8d8c8b, 0x0201008f, 0x06050403, 0x0a090807]), /* shl 5 (16 -11)/shr11*/
|
||||
reg([0x8f8e8d8c, 0x03020100, 0x07060504, 0x0b0a0908]), /* shl 4 (16 -12)/shr12*/
|
||||
reg([0x008f8e8d, 0x04030201, 0x08070605, 0x0c0b0a09]), /* shl 3 (16 -13)/shr13*/
|
||||
reg([0x01008f8e, 0x05040302, 0x09080706, 0x0d0c0b0a]), /* shl 2 (16 -14)/shr14*/
|
||||
reg([0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b]), /* shl 1 (16 -15)/shr15*/
|
||||
];
|
||||
|
||||
let xmm_shl = PSHUFB_SHF_TABLE[len - 1];
|
||||
let xmm_shr = _mm_xor_si128(xmm_shl, reg([0x80808080u32; 4]));
|
||||
|
||||
let xmm_a0 = Self::step(_mm_shuffle_epi8(self.fold[0], xmm_shl));
|
||||
|
||||
self.fold[0] = _mm_shuffle_epi8(self.fold[0], xmm_shr);
|
||||
let xmm_tmp1 = _mm_shuffle_epi8(self.fold[1], xmm_shl);
|
||||
self.fold[0] = _mm_or_si128(self.fold[0], xmm_tmp1);
|
||||
|
||||
self.fold[1] = _mm_shuffle_epi8(self.fold[1], xmm_shr);
|
||||
let xmm_tmp2 = _mm_shuffle_epi8(self.fold[2], xmm_shl);
|
||||
self.fold[1] = _mm_or_si128(self.fold[1], xmm_tmp2);
|
||||
|
||||
self.fold[2] = _mm_shuffle_epi8(self.fold[2], xmm_shr);
|
||||
let xmm_tmp3 = _mm_shuffle_epi8(self.fold[3], xmm_shl);
|
||||
self.fold[2] = _mm_or_si128(self.fold[2], xmm_tmp3);
|
||||
|
||||
self.fold[3] = _mm_shuffle_epi8(self.fold[3], xmm_shr);
|
||||
let xmm_crc_part = _mm_shuffle_epi8(xmm_crc_part, xmm_shl);
|
||||
self.fold[3] = _mm_or_si128(self.fold[3], xmm_crc_part);
|
||||
|
||||
// zlib-ng uses casts and a floating-point xor instruction here. There is a theory that
|
||||
// this breaks dependency chains on some CPUs and gives better throughput. Other sources
|
||||
// claim that casting between integer and float has a cost and should be avoided. We can't
|
||||
// measure the difference, and choose the shorter code.
|
||||
self.fold[3] = _mm_xor_si128(self.fold[3], xmm_a0)
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
fn progress<const N: usize, const COPY: bool>(
|
||||
&mut self,
|
||||
dst: &mut [MaybeUninit<u8>],
|
||||
src: &mut &[u8],
|
||||
init_crc: &mut u32,
|
||||
) -> usize {
|
||||
let mut it = src.chunks_exact(16);
|
||||
let mut input: [_; N] = core::array::from_fn(|_| unsafe {
|
||||
_mm_load_si128(it.next().unwrap().as_ptr() as *const __m128i)
|
||||
});
|
||||
|
||||
*src = &src[N * 16..];
|
||||
|
||||
if COPY {
|
||||
for (s, d) in input[..N].iter().zip(dst.chunks_exact_mut(16)) {
|
||||
unsafe { _mm_storeu_si128(d.as_mut_ptr() as *mut __m128i, *s) };
|
||||
}
|
||||
} else if *init_crc != CRC32_INITIAL_VALUE {
|
||||
let xmm_initial = reg([*init_crc, 0, 0, 0]);
|
||||
input[0] = unsafe { _mm_xor_si128(input[0], xmm_initial) };
|
||||
*init_crc = CRC32_INITIAL_VALUE;
|
||||
}
|
||||
|
||||
self.fold_step::<N>();
|
||||
|
||||
for i in 0..N {
|
||||
self.fold[i + (4 - N)] = unsafe { _mm_xor_si128(self.fold[i + (4 - N)], input[i]) };
|
||||
}
|
||||
|
||||
if COPY {
|
||||
N * 16
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "pclmulqdq", enable = "sse2", enable = "sse4.1")]
|
||||
unsafe fn fold_help<const COPY: bool>(
|
||||
&mut self,
|
||||
mut dst: &mut [MaybeUninit<u8>],
|
||||
mut src: &[u8],
|
||||
mut init_crc: u32,
|
||||
) {
|
||||
let mut xmm_crc_part = reg([0; 4]);
|
||||
|
||||
let mut partial_buf = Align16([0u8; 16]);
|
||||
|
||||
// Technically the CRC functions don't even call this for input < 64, but a bare minimum of 31
|
||||
// bytes of input is needed for the aligning load that occurs. If there's an initial CRC, to
|
||||
// carry it forward through the folded CRC there must be 16 - src % 16 + 16 bytes available, which
|
||||
// by definition can be up to 15 bytes + one full vector load. */
|
||||
assert!(src.len() >= 31 || init_crc == CRC32_INITIAL_VALUE);
|
||||
|
||||
if COPY {
|
||||
assert_eq!(dst.len(), src.len(), "dst and src must be the same length")
|
||||
}
|
||||
|
||||
if src.len() < 16 {
|
||||
if COPY {
|
||||
if src.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
partial_buf.0[..src.len()].copy_from_slice(src);
|
||||
xmm_crc_part = _mm_load_si128(partial_buf.0.as_mut_ptr() as *mut __m128i);
|
||||
dst[..src.len()].copy_from_slice(slice_to_uninit(&partial_buf.0[..src.len()]));
|
||||
}
|
||||
} else {
|
||||
let (before, _, _) = unsafe { src.align_to::<__m128i>() };
|
||||
|
||||
if !before.is_empty() {
|
||||
xmm_crc_part = _mm_loadu_si128(src.as_ptr() as *const __m128i);
|
||||
if COPY {
|
||||
_mm_storeu_si128(dst.as_mut_ptr() as *mut __m128i, xmm_crc_part);
|
||||
dst = &mut dst[before.len()..];
|
||||
} else {
|
||||
let is_initial = init_crc == CRC32_INITIAL_VALUE;
|
||||
|
||||
if !is_initial {
|
||||
let xmm_initial = reg([init_crc, 0, 0, 0]);
|
||||
xmm_crc_part = _mm_xor_si128(xmm_crc_part, xmm_initial);
|
||||
init_crc = CRC32_INITIAL_VALUE;
|
||||
}
|
||||
|
||||
if before.len() < 4 && !is_initial {
|
||||
let xmm_t0 = xmm_crc_part;
|
||||
xmm_crc_part = _mm_loadu_si128((src.as_ptr() as *const __m128i).add(1));
|
||||
|
||||
self.fold_step::<1>();
|
||||
|
||||
self.fold[3] = _mm_xor_si128(self.fold[3], xmm_t0);
|
||||
src = &src[16..];
|
||||
}
|
||||
}
|
||||
|
||||
self.partial_fold(xmm_crc_part, before.len());
|
||||
|
||||
src = &src[before.len()..];
|
||||
}
|
||||
|
||||
// if is_x86_feature_detected!("vpclmulqdq") {
|
||||
// if src.len() >= 256 {
|
||||
// if COPY {
|
||||
// // size_t n = fold_16_vpclmulqdq_copy(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, dst, src, len);
|
||||
// // dst += n;
|
||||
// } else {
|
||||
// // size_t n = fold_16_vpclmulqdq(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, src, len, xmm_initial, first);
|
||||
// // first = false;
|
||||
// }
|
||||
// // len -= n;
|
||||
// // src += n;
|
||||
// }
|
||||
// }
|
||||
|
||||
while src.len() >= 64 {
|
||||
let n = self.progress::<4, COPY>(dst, &mut src, &mut init_crc);
|
||||
dst = &mut dst[n..];
|
||||
}
|
||||
|
||||
if src.len() >= 48 {
|
||||
let n = self.progress::<3, COPY>(dst, &mut src, &mut init_crc);
|
||||
dst = &mut dst[n..];
|
||||
} else if src.len() >= 32 {
|
||||
let n = self.progress::<2, COPY>(dst, &mut src, &mut init_crc);
|
||||
dst = &mut dst[n..];
|
||||
} else if src.len() >= 16 {
|
||||
let n = self.progress::<1, COPY>(dst, &mut src, &mut init_crc);
|
||||
dst = &mut dst[n..];
|
||||
}
|
||||
}
|
||||
|
||||
if !src.is_empty() {
|
||||
core::ptr::copy_nonoverlapping(
|
||||
src.as_ptr(),
|
||||
&mut xmm_crc_part as *mut _ as *mut u8,
|
||||
src.len(),
|
||||
);
|
||||
if COPY {
|
||||
_mm_storeu_si128(partial_buf.0.as_mut_ptr() as *mut __m128i, xmm_crc_part);
|
||||
core::ptr::copy_nonoverlapping(
|
||||
partial_buf.0.as_ptr() as *const MaybeUninit<u8>,
|
||||
dst.as_mut_ptr(),
|
||||
src.len(),
|
||||
);
|
||||
}
|
||||
|
||||
self.partial_fold(xmm_crc_part, src.len());
|
||||
}
|
||||
}
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,109 @@
|
|||
#![forbid(unsafe_code)]
|
||||
|
||||
use crate::{
|
||||
deflate::{
|
||||
fill_window, BlockState, DeflateStream, MIN_LOOKAHEAD, STD_MIN_MATCH, WANT_MIN_MATCH,
|
||||
},
|
||||
flush_block, DeflateFlush,
|
||||
};
|
||||
|
||||
pub fn deflate_fast(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
let mut bflush; /* set if current block must be flushed */
|
||||
let mut dist;
|
||||
let mut match_len = 0;
|
||||
|
||||
loop {
|
||||
// Make sure that we always have enough lookahead, except
|
||||
// at the end of the input file. We need STD_MAX_MATCH bytes
|
||||
// for the next match, plus WANT_MIN_MATCH bytes to insert the
|
||||
// string following the next match.
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD {
|
||||
fill_window(stream);
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD && flush == DeflateFlush::NoFlush {
|
||||
return BlockState::NeedMore;
|
||||
}
|
||||
if stream.state.lookahead == 0 {
|
||||
break; /* flush the current block */
|
||||
}
|
||||
}
|
||||
|
||||
let state = &mut stream.state;
|
||||
|
||||
// Insert the string window[strstart .. strstart+2] in the
|
||||
// dictionary, and set hash_head to the head of the hash chain:
|
||||
|
||||
if state.lookahead >= WANT_MIN_MATCH {
|
||||
let hash_head = state.quick_insert_string(state.strstart);
|
||||
dist = state.strstart as isize - hash_head as isize;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
* At this point we have always match length < WANT_MIN_MATCH
|
||||
*/
|
||||
if dist <= state.max_dist() as isize && dist > 0 && hash_head != 0 {
|
||||
// To simplify the code, we prevent matches with the string
|
||||
// of window index 0 (in particular we have to avoid a match
|
||||
// of the string with itself at the start of the input file).
|
||||
(match_len, state.match_start) =
|
||||
crate::deflate::longest_match::longest_match(state, hash_head);
|
||||
}
|
||||
}
|
||||
|
||||
if match_len >= WANT_MIN_MATCH {
|
||||
// check_match(s, s->strstart, s->match_start, match_len);
|
||||
|
||||
// bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - STD_MIN_MATCH);
|
||||
bflush = state.tally_dist(
|
||||
state.strstart - state.match_start,
|
||||
match_len - STD_MIN_MATCH,
|
||||
);
|
||||
|
||||
state.lookahead -= match_len;
|
||||
|
||||
/* Insert new strings in the hash table only if the match length
|
||||
* is not too large. This saves time but degrades compression.
|
||||
*/
|
||||
if match_len <= state.max_insert_length() && state.lookahead >= WANT_MIN_MATCH {
|
||||
match_len -= 1; /* string at strstart already in table */
|
||||
state.strstart += 1;
|
||||
|
||||
state.insert_string(state.strstart, match_len);
|
||||
state.strstart += match_len;
|
||||
} else {
|
||||
state.strstart += match_len;
|
||||
state.quick_insert_string(state.strstart + 2 - STD_MIN_MATCH);
|
||||
|
||||
/* If lookahead < STD_MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
*/
|
||||
}
|
||||
match_len = 0;
|
||||
} else {
|
||||
/* No match, output a literal byte */
|
||||
let lc = state.window.filled()[state.strstart];
|
||||
bflush = state.tally_lit(lc);
|
||||
state.lookahead -= 1;
|
||||
state.strstart += 1;
|
||||
}
|
||||
|
||||
if bflush {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
}
|
||||
|
||||
stream.state.insert = if stream.state.strstart < (STD_MIN_MATCH - 1) {
|
||||
stream.state.strstart
|
||||
} else {
|
||||
STD_MIN_MATCH - 1
|
||||
};
|
||||
|
||||
if flush == DeflateFlush::Finish {
|
||||
flush_block!(stream, true);
|
||||
return BlockState::FinishDone;
|
||||
}
|
||||
|
||||
if !stream.state.sym_buf.is_empty() {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
|
||||
BlockState::BlockDone
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#![forbid(unsafe_code)]
|
||||
|
||||
use crate::{
|
||||
deflate::{fill_window, BlockState, DeflateStream},
|
||||
flush_block, DeflateFlush,
|
||||
};
|
||||
|
||||
pub fn deflate_huff(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
loop {
|
||||
/* Make sure that we have a literal to write. */
|
||||
if stream.state.lookahead == 0 {
|
||||
fill_window(stream);
|
||||
|
||||
if stream.state.lookahead == 0 {
|
||||
match flush {
|
||||
DeflateFlush::NoFlush => return BlockState::NeedMore,
|
||||
_ => break, /* flush the current block */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Output a literal byte */
|
||||
let state = &mut stream.state;
|
||||
let lc = state.window.filled()[state.strstart];
|
||||
let bflush = state.tally_lit(lc);
|
||||
state.lookahead -= 1;
|
||||
state.strstart += 1;
|
||||
if bflush {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
}
|
||||
|
||||
stream.state.insert = 0;
|
||||
|
||||
if flush == DeflateFlush::Finish {
|
||||
flush_block!(stream, true);
|
||||
return BlockState::FinishDone;
|
||||
}
|
||||
|
||||
if !stream.state.sym_buf.is_empty() {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
|
||||
BlockState::BlockDone
|
||||
}
|
|
@ -0,0 +1,339 @@
|
|||
#![forbid(unsafe_code)]
|
||||
|
||||
use crate::{
|
||||
deflate::{
|
||||
fill_window, BlockState, DeflateStream, State, MIN_LOOKAHEAD, STD_MIN_MATCH, WANT_MIN_MATCH,
|
||||
},
|
||||
flush_block, DeflateFlush,
|
||||
};
|
||||
|
||||
pub fn deflate_medium(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
let mut state = &mut stream.state;
|
||||
|
||||
// For levels below 5, don't check the next position for a better match
|
||||
let early_exit = state.level < 5;
|
||||
|
||||
let mut current_match = Match {
|
||||
match_start: 0,
|
||||
match_length: 0,
|
||||
strstart: 0,
|
||||
orgstart: 0,
|
||||
};
|
||||
let mut next_match = Match {
|
||||
match_start: 0,
|
||||
match_length: 0,
|
||||
strstart: 0,
|
||||
orgstart: 0,
|
||||
};
|
||||
|
||||
loop {
|
||||
let mut hash_head;
|
||||
|
||||
/* Make sure that we always have enough lookahead, except
|
||||
* at the end of the input file. We need STD_MAX_MATCH bytes
|
||||
* for the next match, plus WANT_MIN_MATCH bytes to insert the
|
||||
* string following the next match.
|
||||
*/
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD {
|
||||
fill_window(stream);
|
||||
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD && flush == DeflateFlush::NoFlush {
|
||||
return BlockState::NeedMore;
|
||||
}
|
||||
|
||||
if stream.state.lookahead == 0 {
|
||||
break; /* flush the current block */
|
||||
}
|
||||
|
||||
next_match.match_length = 0;
|
||||
}
|
||||
|
||||
state = &mut stream.state;
|
||||
|
||||
// Insert the string window[strstart .. strstart+2] in the
|
||||
// dictionary, and set hash_head to the head of the hash chain:
|
||||
|
||||
/* If we already have a future match from a previous round, just use that */
|
||||
if !early_exit && next_match.match_length > 0 {
|
||||
current_match = next_match;
|
||||
next_match.match_length = 0;
|
||||
} else {
|
||||
hash_head = 0;
|
||||
if state.lookahead >= WANT_MIN_MATCH {
|
||||
hash_head = state.quick_insert_string(state.strstart);
|
||||
}
|
||||
|
||||
current_match.strstart = state.strstart as u16;
|
||||
current_match.orgstart = current_match.strstart;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
* At this point we have always match_length < WANT_MIN_MATCH
|
||||
*/
|
||||
|
||||
let dist = state.strstart as i64 - hash_head as i64;
|
||||
if dist <= state.max_dist() as i64 && dist > 0 && hash_head != 0 {
|
||||
/* To simplify the code, we prevent matches with the string
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
let (match_length, match_start) =
|
||||
crate::deflate::longest_match::longest_match(state, hash_head);
|
||||
state.match_start = match_start;
|
||||
current_match.match_length = match_length as u16;
|
||||
current_match.match_start = match_start as u16;
|
||||
if (current_match.match_length as usize) < WANT_MIN_MATCH {
|
||||
current_match.match_length = 1;
|
||||
}
|
||||
if current_match.match_start >= current_match.strstart {
|
||||
/* this can happen due to some restarts */
|
||||
current_match.match_length = 1;
|
||||
}
|
||||
} else {
|
||||
/* Set up the match to be a 1 byte literal */
|
||||
current_match.match_start = 0;
|
||||
current_match.match_length = 1;
|
||||
}
|
||||
}
|
||||
|
||||
insert_match(state, current_match);
|
||||
|
||||
/* now, look ahead one */
|
||||
if !early_exit
|
||||
&& state.lookahead > MIN_LOOKAHEAD
|
||||
&& ((current_match.strstart + current_match.match_length) as usize)
|
||||
< (state.window_size - MIN_LOOKAHEAD)
|
||||
{
|
||||
state.strstart = (current_match.strstart + current_match.match_length) as usize;
|
||||
hash_head = state.quick_insert_string(state.strstart);
|
||||
|
||||
next_match.strstart = state.strstart as u16;
|
||||
next_match.orgstart = next_match.strstart;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
* At this point we have always match_length < WANT_MIN_MATCH
|
||||
*/
|
||||
|
||||
let dist = state.strstart as i64 - hash_head as i64;
|
||||
if dist <= state.max_dist() as i64 && dist > 0 && hash_head != 0 {
|
||||
/* To simplify the code, we prevent matches with the string
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
let (match_length, match_start) =
|
||||
crate::deflate::longest_match::longest_match(state, hash_head);
|
||||
state.match_start = match_start;
|
||||
next_match.match_length = match_length as u16;
|
||||
next_match.match_start = match_start as u16;
|
||||
|
||||
if next_match.match_start >= next_match.strstart {
|
||||
/* this can happen due to some restarts */
|
||||
next_match.match_length = 1;
|
||||
}
|
||||
if (next_match.match_length as usize) < WANT_MIN_MATCH {
|
||||
next_match.match_length = 1;
|
||||
} else {
|
||||
fizzle_matches(
|
||||
state.window.filled(),
|
||||
state.max_dist(),
|
||||
&mut current_match,
|
||||
&mut next_match,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
/* Set up the match to be a 1 byte literal */
|
||||
next_match.match_start = 0;
|
||||
next_match.match_length = 1;
|
||||
}
|
||||
|
||||
state.strstart = current_match.strstart as usize;
|
||||
} else {
|
||||
next_match.match_length = 0;
|
||||
}
|
||||
|
||||
/* now emit the current match */
|
||||
let bflush = emit_match(state, current_match);
|
||||
|
||||
/* move the "cursor" forward */
|
||||
state.strstart += current_match.match_length as usize;
|
||||
|
||||
if bflush {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
}
|
||||
|
||||
stream.state.insert = Ord::min(stream.state.strstart, STD_MIN_MATCH - 1);
|
||||
|
||||
if flush == DeflateFlush::Finish {
|
||||
flush_block!(stream, true);
|
||||
return BlockState::FinishDone;
|
||||
}
|
||||
|
||||
if !stream.state.sym_buf.is_empty() {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
|
||||
BlockState::BlockDone
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Match {
|
||||
match_start: u16,
|
||||
match_length: u16,
|
||||
strstart: u16,
|
||||
orgstart: u16,
|
||||
}
|
||||
|
||||
fn emit_match(state: &mut State, mut m: Match) -> bool {
|
||||
let mut bflush = false;
|
||||
|
||||
/* matches that are not long enough we need to emit as literals */
|
||||
if (m.match_length as usize) < WANT_MIN_MATCH {
|
||||
while m.match_length > 0 {
|
||||
let lc = state.window.filled()[state.strstart];
|
||||
bflush |= state.tally_lit(lc);
|
||||
state.lookahead -= 1;
|
||||
m.strstart += 1;
|
||||
m.match_length -= 1;
|
||||
}
|
||||
return bflush;
|
||||
}
|
||||
|
||||
// check_match(s, m.strstart, m.match_start, m.match_length);
|
||||
|
||||
bflush |= state.tally_dist(
|
||||
(m.strstart - m.match_start) as usize,
|
||||
m.match_length as usize - STD_MIN_MATCH,
|
||||
);
|
||||
|
||||
state.lookahead -= m.match_length as usize;
|
||||
|
||||
bflush
|
||||
}
|
||||
|
||||
fn insert_match(state: &mut State, mut m: Match) {
|
||||
if state.lookahead <= (m.match_length as usize + WANT_MIN_MATCH) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* matches that are not long enough we need to emit as literals */
|
||||
if (m.match_length as usize) < WANT_MIN_MATCH {
|
||||
m.strstart += 1;
|
||||
m.match_length -= 1;
|
||||
if m.match_length > 0 && m.strstart >= m.orgstart {
|
||||
if m.strstart + m.match_length > m.orgstart {
|
||||
state.insert_string(m.strstart as usize, m.match_length as usize);
|
||||
} else {
|
||||
state.insert_string(m.strstart as usize, (m.orgstart - m.strstart + 1) as usize);
|
||||
}
|
||||
m.strstart += m.match_length;
|
||||
m.match_length = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Insert new strings in the hash table only if the match length
|
||||
* is not too large. This saves time but degrades compression.
|
||||
*/
|
||||
if (m.match_length as usize) <= 16 * state.max_insert_length()
|
||||
&& state.lookahead >= WANT_MIN_MATCH
|
||||
{
|
||||
m.match_length -= 1; /* string at strstart already in table */
|
||||
m.strstart += 1;
|
||||
|
||||
if m.strstart >= m.orgstart {
|
||||
if m.strstart + m.match_length > m.orgstart {
|
||||
state.insert_string(m.strstart as usize, m.match_length as usize);
|
||||
} else {
|
||||
state.insert_string(m.strstart as usize, (m.orgstart - m.strstart + 1) as usize);
|
||||
}
|
||||
} else if m.orgstart < m.strstart + m.match_length {
|
||||
state.insert_string(
|
||||
m.orgstart as usize,
|
||||
(m.strstart + m.match_length - m.orgstart) as usize,
|
||||
);
|
||||
}
|
||||
m.strstart += m.match_length;
|
||||
m.match_length = 0;
|
||||
} else {
|
||||
m.strstart += m.match_length;
|
||||
m.match_length = 0;
|
||||
|
||||
if (m.strstart as usize) >= (STD_MIN_MATCH - 2) {
|
||||
state.quick_insert_string(m.strstart as usize + 2 - STD_MIN_MATCH);
|
||||
}
|
||||
|
||||
/* If lookahead < WANT_MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
fn fizzle_matches(window: &[u8], max_dist: usize, current: &mut Match, next: &mut Match) {
|
||||
/* step zero: sanity checks */
|
||||
|
||||
if current.match_length <= 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
if current.match_length > 1 + next.match_start {
|
||||
return;
|
||||
}
|
||||
|
||||
if current.match_length > 1 + next.strstart {
|
||||
return;
|
||||
}
|
||||
|
||||
let m = &window[(-(current.match_length as isize) + 1 + next.match_start as isize) as usize..];
|
||||
let orig = &window[(-(current.match_length as isize) + 1 + next.strstart as isize) as usize..];
|
||||
|
||||
/* quick exit check.. if this fails then don't bother with anything else */
|
||||
if m[0] != orig[0] {
|
||||
return;
|
||||
}
|
||||
|
||||
/* step one: try to move the "next" match to the left as much as possible */
|
||||
let limit = next.strstart.saturating_sub(max_dist as u16);
|
||||
|
||||
let mut c = *current;
|
||||
let mut n = *next;
|
||||
|
||||
let m = &window[..n.match_start as usize];
|
||||
let orig = &window[..n.strstart as usize];
|
||||
|
||||
let mut m = m.iter().rev();
|
||||
let mut orig = orig.iter().rev();
|
||||
|
||||
let mut changed = 0;
|
||||
|
||||
while m.next() == orig.next() {
|
||||
if c.match_length < 1 {
|
||||
break;
|
||||
}
|
||||
if n.strstart <= limit {
|
||||
break;
|
||||
}
|
||||
if n.match_length >= 256 {
|
||||
break;
|
||||
}
|
||||
if n.match_start <= 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
n.strstart -= 1;
|
||||
n.match_start -= 1;
|
||||
n.match_length += 1;
|
||||
c.match_length -= 1;
|
||||
changed += 1;
|
||||
}
|
||||
|
||||
if changed == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
if c.match_length <= 1 && n.match_length != 2 {
|
||||
n.orgstart += 1;
|
||||
*current = c;
|
||||
*next = n;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
use crate::{
|
||||
deflate::{BlockState, DeflateStream, Strategy},
|
||||
DeflateFlush,
|
||||
};
|
||||
|
||||
use self::{huff::deflate_huff, rle::deflate_rle, stored::deflate_stored};
|
||||
|
||||
mod fast;
|
||||
mod huff;
|
||||
mod medium;
|
||||
mod quick;
|
||||
mod rle;
|
||||
mod slow;
|
||||
mod stored;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! flush_block {
|
||||
($stream:expr, $is_last_block:expr) => {
|
||||
$crate::deflate::flush_block_only($stream, $is_last_block);
|
||||
|
||||
if $stream.avail_out == 0 {
|
||||
return match $is_last_block {
|
||||
true => BlockState::FinishStarted,
|
||||
false => BlockState::NeedMore,
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn run(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
match stream.state.strategy {
|
||||
_ if stream.state.level == 0 => deflate_stored(stream, flush),
|
||||
Strategy::HuffmanOnly => deflate_huff(stream, flush),
|
||||
Strategy::Rle => deflate_rle(stream, flush),
|
||||
Strategy::Default | Strategy::Filtered | Strategy::Fixed => {
|
||||
(CONFIGURATION_TABLE[stream.state.level as usize].func)(stream, flush)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type CompressFunc = fn(&mut DeflateStream, flush: DeflateFlush) -> BlockState;
|
||||
|
||||
pub struct Config {
|
||||
pub good_length: u16, /* reduce lazy search above this match length */
|
||||
pub max_lazy: u16, /* do not perform lazy search above this match length */
|
||||
pub nice_length: u16, /* quit search above this match length */
|
||||
pub max_chain: u16,
|
||||
pub func: CompressFunc,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
const fn new(
|
||||
good_length: u16,
|
||||
max_lazy: u16,
|
||||
nice_length: u16,
|
||||
max_chain: u16,
|
||||
func: CompressFunc,
|
||||
) -> Self {
|
||||
Self {
|
||||
good_length,
|
||||
max_lazy,
|
||||
nice_length,
|
||||
max_chain,
|
||||
func,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const CONFIGURATION_TABLE: [Config; 10] = {
|
||||
[
|
||||
Config::new(0, 0, 0, 0, stored::deflate_stored), // 0 /* store only */
|
||||
Config::new(0, 0, 0, 0, quick::deflate_quick), // 1
|
||||
Config::new(4, 4, 8, 4, fast::deflate_fast), // 2 /* max speed, no lazy matches */
|
||||
Config::new(4, 6, 16, 6, medium::deflate_medium), // 3
|
||||
Config::new(4, 12, 32, 24, medium::deflate_medium), // 4 /* lazy matches */
|
||||
Config::new(8, 16, 32, 32, medium::deflate_medium), // 5
|
||||
Config::new(8, 16, 128, 128, medium::deflate_medium), // 6
|
||||
Config::new(8, 32, 128, 256, slow::deflate_slow), // 7
|
||||
Config::new(32, 128, 258, 1024, slow::deflate_slow), // 8
|
||||
Config::new(32, 258, 258, 4096, slow::deflate_slow), // 9 /* max compression */
|
||||
]
|
||||
};
|
|
@ -0,0 +1,145 @@
|
|||
#![forbid(unsafe_code)]
|
||||
|
||||
use crate::{
|
||||
deflate::{
|
||||
fill_window, flush_pending, BlockState, BlockType, DeflateStream, State, StaticTreeDesc,
|
||||
MIN_LOOKAHEAD, STD_MAX_MATCH, STD_MIN_MATCH, WANT_MIN_MATCH,
|
||||
},
|
||||
DeflateFlush,
|
||||
};
|
||||
|
||||
pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
let mut state = &mut stream.state;
|
||||
|
||||
macro_rules! quick_end_block {
|
||||
($last:expr) => {
|
||||
if state.block_open > 0 {
|
||||
state
|
||||
.bit_writer
|
||||
.emit_end_block_and_align(&StaticTreeDesc::L.static_tree, $last);
|
||||
state.block_open = 0;
|
||||
state.block_start = state.strstart as isize;
|
||||
flush_pending(stream);
|
||||
#[allow(unused_assignments)]
|
||||
{
|
||||
state = &mut stream.state;
|
||||
}
|
||||
if stream.avail_out == 0 {
|
||||
return match $last {
|
||||
true => BlockState::FinishStarted,
|
||||
false => BlockState::NeedMore,
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! quick_start_block {
|
||||
($last:expr) => {
|
||||
state.bit_writer.emit_tree(BlockType::StaticTrees, $last);
|
||||
state.block_open = 1 + $last as u8;
|
||||
state.block_start = state.strstart as isize;
|
||||
};
|
||||
}
|
||||
|
||||
let last = matches!(flush, DeflateFlush::Finish);
|
||||
|
||||
if last && state.block_open != 2 {
|
||||
/* Emit end of previous block */
|
||||
quick_end_block!(false);
|
||||
/* Emit start of last block */
|
||||
quick_start_block!(last);
|
||||
} else if state.block_open == 0 && state.lookahead > 0 {
|
||||
/* Start new block only when we have lookahead data, so that if no
|
||||
input data is given an empty block will not be written */
|
||||
quick_start_block!(last);
|
||||
}
|
||||
|
||||
loop {
|
||||
if state.bit_writer.pending.pending + State::BIT_BUF_SIZE.div_ceil(8) as usize
|
||||
>= state.pending_buf_size()
|
||||
{
|
||||
flush_pending(stream);
|
||||
state = &mut stream.state;
|
||||
if stream.avail_out == 0 {
|
||||
return if last
|
||||
&& stream.avail_in == 0
|
||||
&& state.bit_writer.bits_used == 0
|
||||
&& state.block_open == 0
|
||||
{
|
||||
BlockState::FinishStarted
|
||||
} else {
|
||||
BlockState::NeedMore
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if state.lookahead < MIN_LOOKAHEAD {
|
||||
fill_window(stream);
|
||||
state = &mut stream.state;
|
||||
|
||||
if state.lookahead < MIN_LOOKAHEAD && matches!(flush, DeflateFlush::NoFlush) {
|
||||
return BlockState::NeedMore;
|
||||
}
|
||||
if state.lookahead == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
if state.block_open == 0 {
|
||||
// Start new block when we have lookahead data,
|
||||
// so that if no input data is given an empty block will not be written
|
||||
quick_start_block!(last);
|
||||
}
|
||||
}
|
||||
|
||||
if state.lookahead >= WANT_MIN_MATCH {
|
||||
let hash_head = state.quick_insert_string(state.strstart);
|
||||
let dist = state.strstart as isize - hash_head as isize;
|
||||
|
||||
if dist <= state.max_dist() as isize && dist > 0 {
|
||||
let str_start = &state.window.filled()[state.strstart..];
|
||||
let match_start = &state.window.filled()[hash_head as usize..];
|
||||
|
||||
if str_start[0] == match_start[0] && str_start[1] == match_start[1] {
|
||||
let mut match_len = crate::deflate::compare256::compare256_slice(
|
||||
&str_start[2..],
|
||||
&match_start[2..],
|
||||
) + 2;
|
||||
|
||||
if match_len >= WANT_MIN_MATCH {
|
||||
match_len = Ord::min(match_len, state.lookahead);
|
||||
match_len = Ord::min(match_len, STD_MAX_MATCH);
|
||||
|
||||
// TODO do this with a debug_assert?
|
||||
// check_match(s, state.strstart, hash_head, match_len);
|
||||
|
||||
state.bit_writer.emit_dist(
|
||||
StaticTreeDesc::L.static_tree,
|
||||
StaticTreeDesc::D.static_tree,
|
||||
(match_len - STD_MIN_MATCH) as u8,
|
||||
dist as usize,
|
||||
);
|
||||
state.lookahead -= match_len;
|
||||
state.strstart += match_len;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let lc = state.window.filled()[state.strstart];
|
||||
state.bit_writer.emit_lit(StaticTreeDesc::L.static_tree, lc);
|
||||
state.strstart += 1;
|
||||
state.lookahead -= 1;
|
||||
}
|
||||
|
||||
state.insert = Ord::min(state.strstart, STD_MIN_MATCH - 1);
|
||||
|
||||
quick_end_block!(last);
|
||||
|
||||
if last {
|
||||
BlockState::FinishDone
|
||||
} else {
|
||||
BlockState::BlockDone
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
#![forbid(unsafe_code)]
|
||||
|
||||
use crate::{
|
||||
deflate::{
|
||||
compare256::compare256_rle_slice, fill_window, BlockState, DeflateStream, MIN_LOOKAHEAD,
|
||||
STD_MAX_MATCH, STD_MIN_MATCH,
|
||||
},
|
||||
flush_block, DeflateFlush,
|
||||
};
|
||||
|
||||
pub fn deflate_rle(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
let mut match_len = 0;
|
||||
let mut bflush;
|
||||
|
||||
loop {
|
||||
// Make sure that we always have enough lookahead, except
|
||||
// at the end of the input file. We need STD_MAX_MATCH bytes
|
||||
// for the next match, plus WANT_MIN_MATCH bytes to insert the
|
||||
// string following the next match.
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD {
|
||||
fill_window(stream);
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD && flush == DeflateFlush::NoFlush {
|
||||
return BlockState::NeedMore;
|
||||
}
|
||||
if stream.state.lookahead == 0 {
|
||||
break; /* flush the current block */
|
||||
}
|
||||
}
|
||||
|
||||
/* See how many times the previous byte repeats */
|
||||
let state = &mut stream.state;
|
||||
if state.lookahead >= STD_MIN_MATCH && state.strstart > 0 {
|
||||
let scan = &state.window.filled()[state.strstart - 1..][..3 + 256];
|
||||
|
||||
{
|
||||
if scan[0] == scan[1] && scan[1] == scan[2] {
|
||||
match_len = compare256_rle_slice(scan[0], &scan[3..]) + 2;
|
||||
match_len = Ord::min(match_len, state.lookahead);
|
||||
match_len = Ord::min(match_len, STD_MAX_MATCH);
|
||||
}
|
||||
}
|
||||
|
||||
assert!(
|
||||
state.strstart - 1 + match_len <= state.window_size - 1,
|
||||
"wild scan"
|
||||
);
|
||||
}
|
||||
|
||||
/* Emit match if have run of STD_MIN_MATCH or longer, else emit literal */
|
||||
if match_len >= STD_MIN_MATCH {
|
||||
// check_match(s, s->strstart, s->strstart - 1, match_len);
|
||||
|
||||
bflush = state.tally_dist(1, match_len - STD_MIN_MATCH);
|
||||
|
||||
state.lookahead -= match_len;
|
||||
state.strstart += match_len;
|
||||
match_len = 0;
|
||||
} else {
|
||||
/* No match, output a literal byte */
|
||||
let lc = state.window.filled()[state.strstart];
|
||||
bflush = state.tally_lit(lc);
|
||||
state.lookahead -= 1;
|
||||
state.strstart += 1;
|
||||
}
|
||||
|
||||
if bflush {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
}
|
||||
|
||||
stream.state.insert = 0;
|
||||
|
||||
if flush == DeflateFlush::Finish {
|
||||
flush_block!(stream, true);
|
||||
return BlockState::FinishDone;
|
||||
}
|
||||
|
||||
if !stream.state.sym_buf.is_empty() {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
|
||||
BlockState::BlockDone
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
#![forbid(unsafe_code)]
|
||||
|
||||
use crate::{
|
||||
deflate::{
|
||||
fill_window, flush_block_only, BlockState, DeflateStream, Strategy, MIN_LOOKAHEAD,
|
||||
STD_MIN_MATCH, WANT_MIN_MATCH,
|
||||
},
|
||||
flush_block, DeflateFlush,
|
||||
};
|
||||
|
||||
pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
let mut hash_head; /* head of hash chain */
|
||||
let mut bflush; /* set if current block must be flushed */
|
||||
let mut dist;
|
||||
let mut match_len;
|
||||
|
||||
let use_longest_match_slow = stream.state.max_chain_length > 1024;
|
||||
let valid_distance_range = 1..=stream.state.max_dist() as isize;
|
||||
|
||||
let mut match_available = stream.state.match_available;
|
||||
|
||||
/* Process the input block. */
|
||||
loop {
|
||||
/* Make sure that we always have enough lookahead, except
|
||||
* at the end of the input file. We need STD_MAX_MATCH bytes
|
||||
* for the next match, plus WANT_MIN_MATCH bytes to insert the
|
||||
* string following the next match.
|
||||
*/
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD {
|
||||
fill_window(stream);
|
||||
if stream.state.lookahead < MIN_LOOKAHEAD && flush == DeflateFlush::NoFlush {
|
||||
return BlockState::NeedMore;
|
||||
}
|
||||
|
||||
if stream.state.lookahead == 0 {
|
||||
break; /* flush the current block */
|
||||
}
|
||||
}
|
||||
|
||||
let state = &mut stream.state;
|
||||
|
||||
/* Insert the string window[strstart .. strstart+2] in the
|
||||
* dictionary, and set hash_head to the head of the hash chain:
|
||||
*/
|
||||
hash_head = if state.lookahead >= WANT_MIN_MATCH {
|
||||
state.quick_insert_string(state.strstart)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// Find the longest match, discarding those <= prev_length.
|
||||
state.prev_match = state.match_start as u16;
|
||||
match_len = STD_MIN_MATCH - 1;
|
||||
dist = state.strstart as isize - hash_head as isize;
|
||||
|
||||
if valid_distance_range.contains(&dist)
|
||||
&& state.prev_length < state.max_lazy_match
|
||||
&& hash_head != 0
|
||||
{
|
||||
// To simplify the code, we prevent matches with the string
|
||||
// of window index 0 (in particular we have to avoid a match
|
||||
// of the string with itself at the start of the input file).
|
||||
(match_len, state.match_start) = if use_longest_match_slow {
|
||||
crate::deflate::longest_match::longest_match_slow(state, hash_head)
|
||||
} else {
|
||||
crate::deflate::longest_match::longest_match(state, hash_head)
|
||||
};
|
||||
|
||||
if match_len <= 5 && (state.strategy == Strategy::Filtered) {
|
||||
/* If prev_match is also WANT_MIN_MATCH, match_start is garbage
|
||||
* but we will ignore the current match anyway.
|
||||
*/
|
||||
match_len = STD_MIN_MATCH - 1;
|
||||
}
|
||||
}
|
||||
|
||||
// If there was a match at the previous step and the current
|
||||
// match is not better, output the previous match:
|
||||
if state.prev_length >= STD_MIN_MATCH && match_len <= state.prev_length {
|
||||
let max_insert = state.strstart + state.lookahead - STD_MIN_MATCH;
|
||||
/* Do not insert strings in hash table beyond this. */
|
||||
|
||||
// check_match(s, state.strstart-1, state.prev_match, state.prev_length);
|
||||
|
||||
bflush = state.tally_dist(
|
||||
state.strstart - 1 - state.prev_match as usize,
|
||||
state.prev_length - STD_MIN_MATCH,
|
||||
);
|
||||
|
||||
/* Insert in hash table all strings up to the end of the match.
|
||||
* strstart-1 and strstart are already inserted. If there is not
|
||||
* enough lookahead, the last two strings are not inserted in
|
||||
* the hash table.
|
||||
*/
|
||||
state.prev_length -= 1;
|
||||
state.lookahead -= state.prev_length;
|
||||
|
||||
let mov_fwd = state.prev_length - 1;
|
||||
if max_insert > state.strstart {
|
||||
let insert_cnt = Ord::min(mov_fwd, max_insert - state.strstart);
|
||||
state.insert_string(state.strstart + 1, insert_cnt);
|
||||
}
|
||||
state.prev_length = 0;
|
||||
state.match_available = false;
|
||||
match_available = false;
|
||||
state.strstart += mov_fwd + 1;
|
||||
|
||||
if bflush {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
} else if match_available {
|
||||
// If there was no match at the previous position, output a
|
||||
// single literal. If there was a match but the current match
|
||||
// is longer, truncate the previous match to a single literal.
|
||||
let lc = state.window.filled()[state.strstart - 1];
|
||||
bflush = state.tally_lit(lc);
|
||||
if bflush {
|
||||
flush_block_only(stream, false);
|
||||
}
|
||||
|
||||
stream.state.prev_length = match_len;
|
||||
stream.state.strstart += 1;
|
||||
stream.state.lookahead -= 1;
|
||||
if stream.avail_out == 0 {
|
||||
return BlockState::NeedMore;
|
||||
}
|
||||
} else {
|
||||
// There is no previous match to compare with, wait for
|
||||
// the next step to decide.
|
||||
state.prev_length = match_len;
|
||||
state.match_available = true;
|
||||
match_available = true;
|
||||
state.strstart += 1;
|
||||
state.lookahead -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
assert_ne!(flush, DeflateFlush::NoFlush, "no flush?");
|
||||
|
||||
let state = &mut stream.state;
|
||||
|
||||
if state.match_available {
|
||||
let lc = state.window.filled()[state.strstart - 1];
|
||||
let _ = state.tally_lit(lc);
|
||||
state.match_available = false;
|
||||
}
|
||||
|
||||
state.insert = Ord::min(state.strstart, STD_MIN_MATCH - 1);
|
||||
|
||||
if flush == DeflateFlush::Finish {
|
||||
flush_block!(stream, true);
|
||||
return BlockState::FinishDone;
|
||||
}
|
||||
|
||||
if !stream.state.sym_buf.is_empty() {
|
||||
flush_block!(stream, false);
|
||||
}
|
||||
|
||||
BlockState::BlockDone
|
||||
}
|
|
@ -0,0 +1,273 @@
|
|||
use crate::{
|
||||
deflate::{
|
||||
flush_pending, read_buf_window, zng_tr_stored_block, BlockState, DeflateStream, MAX_STORED,
|
||||
},
|
||||
DeflateFlush,
|
||||
};
|
||||
|
||||
pub fn deflate_stored(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockState {
|
||||
// Smallest worthy block size when not flushing or finishing. By default
|
||||
// this is 32K. This can be as small as 507 bytes for memLevel == 1. For
|
||||
// large input and output buffers, the stored block size will be larger.
|
||||
let min_block = Ord::min(
|
||||
stream.state.bit_writer.pending.capacity() - 5,
|
||||
stream.state.w_size,
|
||||
);
|
||||
|
||||
// Copy as many min_block or larger stored blocks directly to next_out as
|
||||
// possible. If flushing, copy the remaining available input to next_out as
|
||||
// stored blocks, if there is enough space.
|
||||
|
||||
// unsigned len, left, have, last = 0;
|
||||
let mut have;
|
||||
let mut last = false;
|
||||
let mut used = stream.avail_in;
|
||||
loop {
|
||||
// maximum deflate stored block length
|
||||
let mut len = MAX_STORED;
|
||||
|
||||
// number of header bytes
|
||||
have = ((stream.state.bit_writer.bits_used + 42) / 8) as usize;
|
||||
|
||||
// we need room for at least the header
|
||||
if stream.avail_out < have as u32 {
|
||||
break;
|
||||
}
|
||||
|
||||
let left = stream.state.strstart as isize - stream.state.block_start;
|
||||
let left = Ord::max(0, left) as usize;
|
||||
|
||||
have = stream.avail_out as usize - have;
|
||||
|
||||
if len > left + stream.avail_in as usize {
|
||||
// limit len to the input
|
||||
len = left + stream.avail_in as usize;
|
||||
}
|
||||
|
||||
len = Ord::min(len, have);
|
||||
|
||||
// If the stored block would be less than min_block in length, or if
|
||||
// unable to copy all of the available input when flushing, then try
|
||||
// copying to the window and the pending buffer instead. Also don't
|
||||
// write an empty block when flushing -- deflate() does that.
|
||||
if len < min_block
|
||||
&& ((len == 0 && flush != DeflateFlush::Finish)
|
||||
|| flush == DeflateFlush::NoFlush
|
||||
|| len != left + stream.avail_in as usize)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Make a dummy stored block in pending to get the header bytes,
|
||||
// including any pending bits. This also updates the debugging counts.
|
||||
last = flush == DeflateFlush::Finish && len == left + stream.avail_in as usize;
|
||||
zng_tr_stored_block(stream.state, 0..0, last);
|
||||
|
||||
/* Replace the lengths in the dummy stored block with len. */
|
||||
stream.state.bit_writer.pending.rewind(4);
|
||||
stream
|
||||
.state
|
||||
.bit_writer
|
||||
.pending
|
||||
.extend(&(len as u16).to_le_bytes());
|
||||
stream
|
||||
.state
|
||||
.bit_writer
|
||||
.pending
|
||||
.extend(&(!len as u16).to_le_bytes());
|
||||
|
||||
// Write the stored block header bytes.
|
||||
flush_pending(stream);
|
||||
|
||||
// Update debugging counts for the data about to be copied.
|
||||
stream.state.bit_writer.cmpr_bits_add(len << 3);
|
||||
stream.state.bit_writer.sent_bits_add(len << 3);
|
||||
|
||||
if left > 0 {
|
||||
let left = Ord::min(left, len);
|
||||
let src = &stream.state.window.filled()[stream.state.block_start as usize..];
|
||||
|
||||
unsafe { core::ptr::copy_nonoverlapping(src.as_ptr(), stream.next_out, left) };
|
||||
|
||||
stream.next_out = stream.next_out.wrapping_add(left);
|
||||
stream.avail_out = stream.avail_out.wrapping_sub(left as _);
|
||||
stream.total_out = stream.total_out.wrapping_add(left as _);
|
||||
stream.state.block_start += left as isize;
|
||||
len -= left;
|
||||
}
|
||||
|
||||
// Copy uncompressed bytes directly from next_in to next_out, updating the check value.
|
||||
if len > 0 {
|
||||
read_buf_direct_copy(stream, len);
|
||||
}
|
||||
|
||||
if last {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Update the sliding window with the last s->w_size bytes of the copied
|
||||
// data, or append all of the copied data to the existing window if less
|
||||
// than s->w_size bytes were copied. Also update the number of bytes to
|
||||
// insert in the hash tables, in the event that deflateParams() switches to
|
||||
// a non-zero compression level.
|
||||
used -= stream.avail_in; /* number of input bytes directly copied */
|
||||
|
||||
if used > 0 {
|
||||
let state = &mut stream.state;
|
||||
// If any input was used, then no unused input remains in the window, therefore s->block_start == s->strstart.
|
||||
if used as usize >= state.w_size {
|
||||
/* supplant the previous history */
|
||||
state.matches = 2; /* clear hash */
|
||||
|
||||
let src = stream.next_in.wrapping_sub(state.w_size);
|
||||
|
||||
unsafe { state.window.copy_and_initialize(0..state.w_size, src) };
|
||||
|
||||
state.strstart = state.w_size;
|
||||
state.insert = state.strstart;
|
||||
} else {
|
||||
if state.window_size - state.strstart <= used as usize {
|
||||
/* Slide the window down. */
|
||||
state.strstart -= state.w_size;
|
||||
|
||||
state
|
||||
.window
|
||||
.filled_mut()
|
||||
.copy_within(state.w_size..state.w_size + state.strstart, 0);
|
||||
|
||||
if state.matches < 2 {
|
||||
state.matches += 1; /* add a pending slide_hash() */
|
||||
}
|
||||
state.insert = Ord::min(state.insert, state.strstart);
|
||||
}
|
||||
|
||||
let src = stream.next_in.wrapping_sub(used as usize);
|
||||
let dst = state.strstart..state.strstart + used as usize;
|
||||
unsafe { state.window.copy_and_initialize(dst, src) };
|
||||
|
||||
state.strstart += used as usize;
|
||||
state.insert += Ord::min(used as usize, state.w_size - state.insert);
|
||||
}
|
||||
state.block_start = state.strstart as isize;
|
||||
}
|
||||
|
||||
if last {
|
||||
return BlockState::FinishDone;
|
||||
}
|
||||
|
||||
// If flushing and all input has been consumed, then done.
|
||||
if flush != DeflateFlush::NoFlush
|
||||
&& flush != DeflateFlush::Finish
|
||||
&& stream.avail_in == 0
|
||||
&& stream.state.strstart as isize == stream.state.block_start
|
||||
{
|
||||
return BlockState::BlockDone;
|
||||
}
|
||||
|
||||
// Fill the window with any remaining input
|
||||
let mut have = stream.state.window_size - stream.state.strstart;
|
||||
if stream.avail_in as usize > have && stream.state.block_start >= stream.state.w_size as isize {
|
||||
// slide the window down
|
||||
let state = &mut stream.state;
|
||||
state.block_start -= state.w_size as isize;
|
||||
state.strstart -= state.w_size;
|
||||
state
|
||||
.window
|
||||
.filled_mut()
|
||||
.copy_within(state.w_size..state.w_size + state.strstart, 0);
|
||||
|
||||
if state.matches < 2 {
|
||||
// add a pending slide_hash
|
||||
state.matches += 1;
|
||||
}
|
||||
|
||||
have += state.w_size; // more space now
|
||||
state.insert = Ord::min(state.insert, state.strstart);
|
||||
}
|
||||
|
||||
let have = Ord::min(have, stream.avail_in as usize);
|
||||
if have > 0 {
|
||||
read_buf_window(stream, stream.state.strstart, have);
|
||||
|
||||
let state = &mut stream.state;
|
||||
state.strstart += have;
|
||||
state.insert += Ord::min(have, state.w_size - state.insert);
|
||||
}
|
||||
|
||||
// There was not enough avail_out to write a complete worthy or flushed
|
||||
// stored block to next_out. Write a stored block to pending instead, if we
|
||||
// have enough input for a worthy block, or if flushing and there is enough
|
||||
// room for the remaining input as a stored block in the pending buffer.
|
||||
|
||||
// number of header bytes
|
||||
let state = &mut stream.state;
|
||||
let have = ((state.bit_writer.bits_used + 42) >> 3) as usize;
|
||||
|
||||
// maximum stored block length that will fit in pending:
|
||||
let have = Ord::min(state.bit_writer.pending.capacity() - have, MAX_STORED);
|
||||
let min_block = Ord::min(have, state.w_size);
|
||||
let left = state.strstart as isize - state.block_start;
|
||||
|
||||
if left >= min_block as isize
|
||||
|| ((left > 0 || flush == DeflateFlush::Finish)
|
||||
&& flush != DeflateFlush::NoFlush
|
||||
&& stream.avail_in == 0
|
||||
&& left <= have as isize)
|
||||
{
|
||||
let len = Ord::min(left as usize, have); // TODO wrapping?
|
||||
last = flush == DeflateFlush::Finish && stream.avail_in == 0 && len == (left as usize);
|
||||
|
||||
let range = state.block_start as usize..state.block_start as usize + len;
|
||||
zng_tr_stored_block(state, range, last);
|
||||
|
||||
state.block_start += len as isize;
|
||||
flush_pending(stream);
|
||||
}
|
||||
|
||||
// We've done all we can with the available input and output.
|
||||
if last {
|
||||
BlockState::FinishStarted
|
||||
} else {
|
||||
BlockState::NeedMore
|
||||
}
|
||||
}
|
||||
|
||||
fn read_buf_direct_copy(stream: &mut DeflateStream, size: usize) -> usize {
|
||||
let len = Ord::min(stream.avail_in as usize, size);
|
||||
let output = stream.next_out;
|
||||
|
||||
if len == 0 {
|
||||
return 0;
|
||||
}
|
||||
|
||||
stream.avail_in -= len as u32;
|
||||
|
||||
if stream.state.wrap == 2 {
|
||||
// we likely cannot fuse the crc32 and the copy here because the input can be changed by
|
||||
// a concurrent thread. Therefore it cannot be converted into a slice!
|
||||
unsafe { core::ptr::copy_nonoverlapping(stream.next_in, output, len) }
|
||||
|
||||
let data = unsafe { core::slice::from_raw_parts(output, len) };
|
||||
stream.state.crc_fold.fold(data, 0);
|
||||
} else if stream.state.wrap == 1 {
|
||||
// we cannot fuse the adler and the copy in our case, because adler32 takes a slice.
|
||||
// Another process is allowed to concurrently modify stream.next_in, so we cannot turn it
|
||||
// into a rust slice (violates its safety requirements)
|
||||
unsafe { core::ptr::copy_nonoverlapping(stream.next_in, output, len) }
|
||||
|
||||
let data = unsafe { core::slice::from_raw_parts(output, len) };
|
||||
stream.adler = crate::adler32::adler32(stream.adler as u32, data) as _;
|
||||
} else {
|
||||
unsafe { core::ptr::copy_nonoverlapping(stream.next_in, output, len) }
|
||||
}
|
||||
|
||||
stream.next_in = stream.next_in.wrapping_add(len);
|
||||
stream.total_in += len as crate::c_api::z_size;
|
||||
|
||||
stream.next_out = stream.next_out.wrapping_add(len as _);
|
||||
stream.avail_out = stream.avail_out.wrapping_sub(len as _);
|
||||
stream.total_out = stream.total_out.wrapping_add(len as _);
|
||||
|
||||
len
|
||||
}
|
|
@ -0,0 +1,221 @@
|
|||
#[warn(unsafe_op_in_unsafe_fn)]
|
||||
#[cfg(test)]
|
||||
const MAX_COMPARE_SIZE: usize = 256;
|
||||
|
||||
pub fn compare256_slice(src0: &[u8], src1: &[u8]) -> usize {
|
||||
let src0 = first_chunk::<_, 256>(src0).unwrap();
|
||||
let src1 = first_chunk::<_, 256>(src1).unwrap();
|
||||
|
||||
compare256(src0, src1)
|
||||
}
|
||||
|
||||
fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::cpu_features::is_enabled_avx2() {
|
||||
return unsafe { avx2::compare256(src0, src1) };
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
if crate::cpu_features::is_enabled_neon() {
|
||||
return unsafe { neon::compare256(src0, src1) };
|
||||
}
|
||||
|
||||
rust::compare256(src0, src1)
|
||||
}
|
||||
|
||||
pub fn compare256_rle_slice(byte: u8, src: &[u8]) -> usize {
|
||||
rust::compare256_rle(byte, src)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn first_chunk<T, const N: usize>(slice: &[T]) -> Option<&[T; N]> {
|
||||
if slice.len() < N {
|
||||
None
|
||||
} else {
|
||||
// SAFETY: We explicitly check for the correct number of elements,
|
||||
// and do not let the reference outlive the slice.
|
||||
Some(unsafe { &*(slice.as_ptr() as *const [T; N]) })
|
||||
}
|
||||
}
|
||||
|
||||
mod rust {
|
||||
|
||||
pub fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
|
||||
// only unrolls 4 iterations; zlib-ng unrolls 8
|
||||
src0.iter().zip(src1).take_while(|(x, y)| x == y).count()
|
||||
}
|
||||
|
||||
// run-length encoding
|
||||
pub fn compare256_rle(byte: u8, src: &[u8]) -> usize {
|
||||
assert!(src.len() >= 256, "too short {}", src.len());
|
||||
|
||||
let mut sv = byte as u64;
|
||||
sv |= sv << 8;
|
||||
sv |= sv << 16;
|
||||
sv |= sv << 32;
|
||||
|
||||
let mut len = 0;
|
||||
|
||||
// this optimizes well because we statically limit the slice to 256 bytes.
|
||||
// the loop gets unrolled 4 times automatically.
|
||||
for chunk in src[..256].chunks_exact(8) {
|
||||
let mv = u64::from_le_bytes(chunk.try_into().unwrap());
|
||||
|
||||
let diff = sv ^ mv;
|
||||
|
||||
if diff > 0 {
|
||||
let match_byte = diff.trailing_zeros() / 8;
|
||||
return len + match_byte as usize;
|
||||
}
|
||||
|
||||
len += 8
|
||||
}
|
||||
|
||||
256
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare256() {
|
||||
let str1 = [b'a'; super::MAX_COMPARE_SIZE];
|
||||
let mut str2 = [b'a'; super::MAX_COMPARE_SIZE];
|
||||
|
||||
for i in 0..str1.len() {
|
||||
str2[i] = 0;
|
||||
|
||||
let match_len = compare256(&str1, &str2);
|
||||
assert_eq!(match_len, i);
|
||||
|
||||
str2[i] = b'a';
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare256_rle() {
|
||||
let mut string = [b'a'; super::MAX_COMPARE_SIZE];
|
||||
|
||||
for i in 0..string.len() {
|
||||
string[i] = 0;
|
||||
|
||||
let match_len = compare256_rle(b'a', &string);
|
||||
assert_eq!(match_len, i);
|
||||
|
||||
string[i] = b'a';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod neon {
|
||||
use core::arch::aarch64::{
|
||||
uint8x16_t, veorq_u8, vgetq_lane_u64, vld1q_u8, vreinterpretq_u64_u8,
|
||||
};
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// Behavior is undefined if the `neon` target feature is not enabled
|
||||
#[target_feature(enable = "neon")]
|
||||
pub unsafe fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
|
||||
let src0: &[[u8; 16]; 16] = unsafe { core::mem::transmute(src0) };
|
||||
let src1: &[[u8; 16]; 16] = unsafe { core::mem::transmute(src1) };
|
||||
|
||||
let mut len = 0;
|
||||
|
||||
for (a, b) in src0.iter().zip(src1) {
|
||||
unsafe {
|
||||
let a: uint8x16_t = vld1q_u8(a.as_ptr());
|
||||
let b: uint8x16_t = vld1q_u8(b.as_ptr());
|
||||
|
||||
let cmp = veorq_u8(a, b);
|
||||
|
||||
let lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 0);
|
||||
if lane != 0 {
|
||||
let match_byte = lane.trailing_zeros() / 8;
|
||||
return len + match_byte as usize;
|
||||
}
|
||||
|
||||
len += 8;
|
||||
|
||||
let lane = vgetq_lane_u64(vreinterpretq_u64_u8(cmp), 1);
|
||||
if lane != 0 {
|
||||
let match_byte = lane.trailing_zeros() / 8;
|
||||
return len + match_byte as usize;
|
||||
}
|
||||
|
||||
len += 8;
|
||||
}
|
||||
}
|
||||
|
||||
256
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare256() {
|
||||
if crate::cpu_features::is_enabled_neon() {
|
||||
let str1 = [b'a'; super::MAX_COMPARE_SIZE];
|
||||
let mut str2 = [b'a'; super::MAX_COMPARE_SIZE];
|
||||
|
||||
for i in 0..str1.len() {
|
||||
str2[i] = 0;
|
||||
|
||||
let match_len = unsafe { compare256(&str1, &str2) };
|
||||
assert_eq!(match_len, i);
|
||||
|
||||
str2[i] = b'a';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod avx2 {
|
||||
use core::arch::x86_64::{
|
||||
__m256i, _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8,
|
||||
};
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// Behavior is undefined if the `avx` target feature is not enabled
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn compare256(src0: &[u8; 256], src1: &[u8; 256]) -> usize {
|
||||
let src0: &[[u8; 32]; 8] = unsafe { core::mem::transmute(src0) };
|
||||
let src1: &[[u8; 32]; 8] = unsafe { core::mem::transmute(src1) };
|
||||
|
||||
let mut len = 0;
|
||||
|
||||
unsafe {
|
||||
for (chunk0, chunk1) in src0.iter().zip(src1) {
|
||||
let ymm_src0 = _mm256_loadu_si256(chunk0.as_ptr() as *const __m256i);
|
||||
let ymm_src1 = _mm256_loadu_si256(chunk1.as_ptr() as *const __m256i);
|
||||
|
||||
let ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); /* non-identical bytes = 00, identical bytes = FF */
|
||||
let mask = _mm256_movemask_epi8(ymm_cmp) as u32;
|
||||
|
||||
if mask != 0xFFFFFFFF {
|
||||
let match_byte = (!mask).trailing_zeros(); /* Invert bits so identical = 0 */
|
||||
return len + match_byte as usize;
|
||||
}
|
||||
|
||||
len += 32;
|
||||
}
|
||||
}
|
||||
|
||||
256
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare256() {
|
||||
if crate::cpu_features::is_enabled_avx2() {
|
||||
let str1 = [b'a'; super::MAX_COMPARE_SIZE];
|
||||
let mut str2 = [b'a'; super::MAX_COMPARE_SIZE];
|
||||
|
||||
for i in 0..str1.len() {
|
||||
str2[i] = 0;
|
||||
|
||||
let match_len = unsafe { compare256(&str1, &str2) };
|
||||
assert_eq!(match_len, i);
|
||||
|
||||
str2[i] = b'a';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,295 @@
|
|||
#![warn(unsafe_op_in_unsafe_fn)]
|
||||
use crate::deflate::{State, HASH_SIZE, STD_MIN_MATCH};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum HashCalcVariant {
|
||||
Standard,
|
||||
Crc32,
|
||||
Roll,
|
||||
}
|
||||
|
||||
impl HashCalcVariant {
|
||||
#[cfg(test)]
|
||||
pub fn for_compression_level(level: usize) -> Self {
|
||||
let max_chain_length = crate::deflate::algorithm::CONFIGURATION_TABLE[level].max_chain;
|
||||
Self::for_max_chain_length(max_chain_length as usize)
|
||||
}
|
||||
|
||||
/// Use rolling hash for deflate_slow algorithm with level 9. It allows us to
|
||||
/// properly lookup different hash chains to speed up longest_match search.
|
||||
pub fn for_max_chain_length(max_chain_length: usize) -> Self {
|
||||
if max_chain_length > 1024 {
|
||||
HashCalcVariant::Roll
|
||||
} else if Crc32HashCalc::is_supported() {
|
||||
HashCalcVariant::Crc32
|
||||
} else {
|
||||
HashCalcVariant::Standard
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StandardHashCalc;
|
||||
|
||||
impl StandardHashCalc {
|
||||
const HASH_CALC_OFFSET: usize = 0;
|
||||
|
||||
const HASH_CALC_MASK: u32 = (HASH_SIZE - 1) as u32;
|
||||
|
||||
fn hash_calc(_: u32, val: u32) -> u32 {
|
||||
const HASH_SLIDE: u32 = 16;
|
||||
val.wrapping_mul(2654435761) >> HASH_SLIDE
|
||||
}
|
||||
|
||||
pub fn update_hash(h: u32, val: u32) -> u32 {
|
||||
Self::hash_calc(h, val) & Self::HASH_CALC_MASK
|
||||
}
|
||||
|
||||
pub fn quick_insert_string(state: &mut State, string: usize) -> u16 {
|
||||
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
|
||||
let val = u32::from_le_bytes(slice[..4].try_into().unwrap());
|
||||
|
||||
let hm = Self::update_hash(0, val) as usize;
|
||||
|
||||
let head = state.head[hm];
|
||||
if head != string as u16 {
|
||||
state.prev[string & state.w_mask] = head;
|
||||
state.head[hm] = string as u16;
|
||||
}
|
||||
|
||||
head
|
||||
}
|
||||
|
||||
pub fn insert_string(state: &mut State, string: usize, count: usize) {
|
||||
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
|
||||
|
||||
// .take(count) generates worse assembly
|
||||
for (i, w) in slice[..count + 3].windows(4).enumerate() {
|
||||
let idx = string as u16 + i as u16;
|
||||
|
||||
let val = u32::from_le_bytes(w.try_into().unwrap());
|
||||
|
||||
let hm = Self::update_hash(0, val) as usize;
|
||||
|
||||
let head = state.head[hm];
|
||||
if head != idx {
|
||||
state.prev[idx as usize & state.w_mask] = head;
|
||||
state.head[hm] = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RollHashCalc;
|
||||
|
||||
impl RollHashCalc {
|
||||
const HASH_CALC_OFFSET: usize = STD_MIN_MATCH - 1;
|
||||
|
||||
const HASH_CALC_MASK: u32 = (1 << 15) - 1;
|
||||
|
||||
fn hash_calc(h: u32, val: u32) -> u32 {
|
||||
const HASH_SLIDE: u32 = 5;
|
||||
(h << HASH_SLIDE) ^ val
|
||||
}
|
||||
|
||||
pub fn update_hash(h: u32, val: u32) -> u32 {
|
||||
Self::hash_calc(h, val) & Self::HASH_CALC_MASK
|
||||
}
|
||||
|
||||
pub fn quick_insert_string(state: &mut State, string: usize) -> u16 {
|
||||
let val = state.window.filled()[string + Self::HASH_CALC_OFFSET] as u32;
|
||||
|
||||
state.ins_h = Self::hash_calc(state.ins_h as u32, val) as usize;
|
||||
state.ins_h &= Self::HASH_CALC_MASK as usize;
|
||||
|
||||
let hm = state.ins_h;
|
||||
|
||||
let head = state.head[hm];
|
||||
if head != string as u16 {
|
||||
state.prev[string & state.w_mask] = head;
|
||||
state.head[hm] = string as u16;
|
||||
}
|
||||
|
||||
head
|
||||
}
|
||||
|
||||
pub fn insert_string(state: &mut State, string: usize, count: usize) {
|
||||
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..][..count];
|
||||
|
||||
for (i, val) in slice.iter().copied().enumerate() {
|
||||
let idx = string as u16 + i as u16;
|
||||
|
||||
state.ins_h = Self::hash_calc(state.ins_h as u32, val as u32) as usize;
|
||||
state.ins_h &= Self::HASH_CALC_MASK as usize;
|
||||
let hm = state.ins_h;
|
||||
|
||||
let head = state.head[hm];
|
||||
if head != idx {
|
||||
state.prev[idx as usize & state.w_mask] = head;
|
||||
state.head[hm] = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Crc32HashCalc;
|
||||
|
||||
impl Crc32HashCalc {
|
||||
fn is_supported() -> bool {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
return crate::cpu_features::is_enabled_sse42();
|
||||
|
||||
// NOTE: more recent versions of zlib-ng no longer use the crc instructions on aarch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
return crate::cpu_features::is_enabled_crc();
|
||||
|
||||
#[allow(unreachable_code)]
|
||||
false
|
||||
}
|
||||
|
||||
const HASH_CALC_OFFSET: usize = 0;
|
||||
|
||||
const HASH_CALC_MASK: u32 = (HASH_SIZE - 1) as u32;
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
|
||||
unsafe { core::arch::x86::_mm_crc32_u32(h, val) }
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
|
||||
unsafe { core::arch::x86_64::_mm_crc32_u32(h, val) }
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[target_feature(enable = "neon")]
|
||||
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
|
||||
unsafe { crate::crc32::acle::__crc32w(h, val) }
|
||||
}
|
||||
|
||||
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
|
||||
unsafe fn hash_calc(_h: u32, _val: u32) -> u32 {
|
||||
assert!(!Self::is_supported());
|
||||
unimplemented!("there is no hardware support on this platform")
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
|
||||
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
|
||||
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
|
||||
pub unsafe fn update_hash(h: u32, val: u32) -> u32 {
|
||||
(unsafe { Self::hash_calc(h, val) }) & Self::HASH_CALC_MASK
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
|
||||
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
|
||||
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
|
||||
pub unsafe fn quick_insert_string(state: &mut State, string: usize) -> u16 {
|
||||
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
|
||||
let val = u32::from_le_bytes(slice[..4].try_into().unwrap());
|
||||
|
||||
let hm = unsafe { Self::update_hash(0, val) } as usize;
|
||||
|
||||
let head = state.head[hm];
|
||||
if head != string as u16 {
|
||||
state.prev[string & state.w_mask] = head;
|
||||
state.head[hm] = string as u16;
|
||||
}
|
||||
|
||||
head
|
||||
}
|
||||
|
||||
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
|
||||
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
|
||||
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
|
||||
pub unsafe fn insert_string(state: &mut State, string: usize, count: usize) {
|
||||
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
|
||||
|
||||
// .take(count) generates worse assembly
|
||||
for (i, w) in slice[..count + 3].windows(4).enumerate() {
|
||||
let idx = string as u16 + i as u16;
|
||||
|
||||
let val = u32::from_le_bytes(w.try_into().unwrap());
|
||||
|
||||
let hm = unsafe { Self::update_hash(0, val) } as usize;
|
||||
|
||||
let head = state.head[hm];
|
||||
if head != idx {
|
||||
state.prev[idx as usize & state.w_mask] = head;
|
||||
state.head[hm] = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(
|
||||
not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")),
|
||||
ignore = "no crc32 hardware support on this platform"
|
||||
)]
|
||||
fn crc32_hash_calc() {
|
||||
if !Crc32HashCalc::is_supported() {
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 170926112), 500028708);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 537538592), 3694129053);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538970672), 373925026);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538976266), 4149335727);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538976288), 1767342659);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 4090502627);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1744703325);
|
||||
} else {
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2067507791);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 2086141925);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 716394180);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1396070634);
|
||||
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 637105634);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roll_hash_calc() {
|
||||
assert_eq!(RollHashCalc::hash_calc(2565, 93), 82173);
|
||||
assert_eq!(RollHashCalc::hash_calc(16637, 10), 532394);
|
||||
assert_eq!(RollHashCalc::hash_calc(8106, 100), 259364);
|
||||
assert_eq!(RollHashCalc::hash_calc(29988, 101), 959717);
|
||||
assert_eq!(RollHashCalc::hash_calc(9445, 98), 302274);
|
||||
assert_eq!(RollHashCalc::hash_calc(7362, 117), 235573);
|
||||
assert_eq!(RollHashCalc::hash_calc(6197, 103), 198343);
|
||||
assert_eq!(RollHashCalc::hash_calc(1735, 32), 55488);
|
||||
assert_eq!(RollHashCalc::hash_calc(22720, 61), 727101);
|
||||
assert_eq!(RollHashCalc::hash_calc(6205, 32), 198528);
|
||||
assert_eq!(RollHashCalc::hash_calc(3826, 117), 122421);
|
||||
assert_eq!(RollHashCalc::hash_calc(24117, 101), 771781);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn standard_hash_calc() {
|
||||
assert_eq!(StandardHashCalc::hash_calc(0, 807411760), 65468);
|
||||
assert_eq!(StandardHashCalc::hash_calc(0, 540024864), 42837);
|
||||
assert_eq!(StandardHashCalc::hash_calc(0, 538980384), 33760);
|
||||
assert_eq!(StandardHashCalc::hash_calc(0, 775430176), 8925);
|
||||
assert_eq!(StandardHashCalc::hash_calc(0, 941629472), 42053);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,359 @@
|
|||
use crate::deflate::{State, MIN_LOOKAHEAD, STD_MAX_MATCH, STD_MIN_MATCH};
|
||||
|
||||
type Pos = u16;
|
||||
|
||||
const EARLY_EXIT_TRIGGER_LEVEL: i8 = 5;
|
||||
|
||||
const UNALIGNED_OK: bool = cfg!(any(
|
||||
target_arch = "x86",
|
||||
target_arch = "x86_64",
|
||||
target_arch = "arm",
|
||||
target_arch = "aarch64",
|
||||
target_arch = "powerpc64",
|
||||
));
|
||||
|
||||
const UNALIGNED64_OK: bool = cfg!(any(
|
||||
target_arch = "x86_64",
|
||||
target_arch = "aarch64",
|
||||
target_arch = "powerpc64",
|
||||
));
|
||||
|
||||
pub fn longest_match(state: &crate::deflate::State, cur_match: u16) -> (usize, usize) {
|
||||
longest_match_help::<false>(state, cur_match)
|
||||
}
|
||||
|
||||
pub fn longest_match_slow(state: &crate::deflate::State, cur_match: u16) -> (usize, usize) {
|
||||
longest_match_help::<true>(state, cur_match)
|
||||
}
|
||||
|
||||
fn longest_match_help<const SLOW: bool>(
|
||||
state: &crate::deflate::State,
|
||||
mut cur_match: u16,
|
||||
) -> (usize, usize) {
|
||||
let mut match_start = state.match_start;
|
||||
|
||||
let strstart = state.strstart;
|
||||
let wmask = state.w_mask;
|
||||
let window = state.window.filled();
|
||||
let scan = &window[strstart..];
|
||||
let mut limit: Pos;
|
||||
let limit_base: Pos;
|
||||
let early_exit: bool;
|
||||
|
||||
let mut chain_length: usize;
|
||||
let mut best_len: usize;
|
||||
|
||||
let lookahead = state.lookahead;
|
||||
let mut match_offset = 0;
|
||||
|
||||
let mut scan_start = [0u8; 8];
|
||||
let mut scan_end = [0u8; 8];
|
||||
|
||||
macro_rules! goto_next_in_chain {
|
||||
() => {
|
||||
chain_length -= 1;
|
||||
if chain_length > 0 {
|
||||
cur_match = state.prev[cur_match as usize & wmask];
|
||||
|
||||
if cur_match > limit {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return (best_len, match_start);
|
||||
};
|
||||
}
|
||||
|
||||
// The code is optimized for STD_MAX_MATCH-2 multiple of 16.
|
||||
assert_eq!(STD_MAX_MATCH, 258, "Code too clever");
|
||||
|
||||
best_len = if state.prev_length > 0 {
|
||||
state.prev_length
|
||||
} else {
|
||||
STD_MIN_MATCH - 1
|
||||
};
|
||||
|
||||
// Calculate read offset which should only extend an extra byte to find the next best match length.
|
||||
let mut offset = best_len - 1;
|
||||
if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
|
||||
offset -= 2;
|
||||
if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
|
||||
offset -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
if UNALIGNED64_OK {
|
||||
scan_start.copy_from_slice(&scan[..core::mem::size_of::<u64>()]);
|
||||
scan_end.copy_from_slice(&scan[offset..][..core::mem::size_of::<u64>()]);
|
||||
} else if UNALIGNED_OK {
|
||||
scan_start[..4].copy_from_slice(&scan[..core::mem::size_of::<u32>()]);
|
||||
scan_end[..4].copy_from_slice(&scan[offset..][..core::mem::size_of::<u32>()]);
|
||||
} else {
|
||||
scan_start[..2].copy_from_slice(&scan[..core::mem::size_of::<u16>()]);
|
||||
scan_end[..2].copy_from_slice(&scan[offset..][..core::mem::size_of::<u16>()]);
|
||||
}
|
||||
|
||||
let mut mbase_start = window.as_ptr();
|
||||
let mut mbase_end = window[offset..].as_ptr();
|
||||
|
||||
// Don't waste too much time by following a chain if we already have a good match
|
||||
chain_length = state.max_chain_length;
|
||||
if best_len >= state.good_match {
|
||||
chain_length >>= 2;
|
||||
}
|
||||
let nice_match = state.nice_match;
|
||||
|
||||
// Stop when cur_match becomes <= limit. To simplify the code,
|
||||
// we prevent matches with the string of window index 0
|
||||
limit = strstart.saturating_sub(state.max_dist()) as Pos;
|
||||
|
||||
// look for a better string offset
|
||||
if SLOW {
|
||||
limit_base = limit;
|
||||
|
||||
if best_len >= STD_MIN_MATCH {
|
||||
/* We're continuing search (lazy evaluation). */
|
||||
let mut pos: Pos;
|
||||
|
||||
// Find a most distant chain starting from scan with index=1 (index=0 corresponds
|
||||
// to cur_match). We cannot use s->prev[strstart+1,...] immediately, because
|
||||
// these strings are not yet inserted into the hash table.
|
||||
let Some([_cur_match, scan1, scan2, scanrest @ ..]) = scan.get(..best_len + 1) else {
|
||||
panic!("invalid scan");
|
||||
};
|
||||
|
||||
let mut hash = 0;
|
||||
hash = state.update_hash(hash, *scan1 as u32);
|
||||
hash = state.update_hash(hash, *scan2 as u32);
|
||||
|
||||
for (i, b) in scanrest.iter().enumerate() {
|
||||
hash = state.update_hash(hash, *b as u32);
|
||||
|
||||
/* If we're starting with best_len >= 3, we can use offset search. */
|
||||
pos = state.head[hash as usize];
|
||||
if pos < cur_match {
|
||||
match_offset = (i + 1) as Pos;
|
||||
cur_match = pos;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update offset-dependent variables */
|
||||
limit = limit_base + match_offset;
|
||||
if cur_match <= limit {
|
||||
return break_matching(state, best_len, match_start);
|
||||
}
|
||||
|
||||
mbase_start = mbase_start.wrapping_sub(match_offset as usize);
|
||||
mbase_end = mbase_end.wrapping_sub(match_offset as usize);
|
||||
}
|
||||
|
||||
early_exit = false;
|
||||
} else {
|
||||
// must initialize this variable
|
||||
limit_base = 0;
|
||||
early_exit = state.level < EARLY_EXIT_TRIGGER_LEVEL;
|
||||
}
|
||||
|
||||
assert!(
|
||||
strstart <= state.window_size - MIN_LOOKAHEAD,
|
||||
"need lookahead"
|
||||
);
|
||||
|
||||
loop {
|
||||
if cur_match as usize >= strstart {
|
||||
break;
|
||||
}
|
||||
|
||||
// Skip to next match if the match length cannot increase or if the match length is
|
||||
// less than 2. Note that the checks below for insufficient lookahead only occur
|
||||
// occasionally for performance reasons.
|
||||
// Therefore uninitialized memory will be accessed and conditional jumps will be made
|
||||
// that depend on those values. However the length of the match is limited to the
|
||||
// lookahead, so the output of deflate is not affected by the uninitialized values.
|
||||
|
||||
// # Safety
|
||||
//
|
||||
// The two pointers must be valid for reads of N bytes.
|
||||
#[inline(always)]
|
||||
unsafe fn memcmp_n_ptr<const N: usize>(src0: *const u8, src1: *const u8) -> bool {
|
||||
let src0_cmp = core::ptr::read(src0 as *const [u8; N]);
|
||||
let src1_cmp = core::ptr::read(src1 as *const [u8; N]);
|
||||
|
||||
src0_cmp == src1_cmp
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn is_match<const N: usize>(
|
||||
cur_match: u16,
|
||||
mbase_start: *const u8,
|
||||
mbase_end: *const u8,
|
||||
scan_start: *const u8,
|
||||
scan_end: *const u8,
|
||||
) -> bool {
|
||||
let be = mbase_end.wrapping_add(cur_match as usize);
|
||||
let bs = mbase_start.wrapping_add(cur_match as usize);
|
||||
|
||||
memcmp_n_ptr::<N>(be, scan_end) && memcmp_n_ptr::<N>(bs, scan_start)
|
||||
}
|
||||
|
||||
// first, do a quick check on the start and end bytes. Go to the next item in the chain if
|
||||
// these bytes don't match.
|
||||
unsafe {
|
||||
let scan_start = scan_start.as_ptr();
|
||||
let scan_end = scan_end.as_ptr();
|
||||
|
||||
if UNALIGNED_OK {
|
||||
if best_len < core::mem::size_of::<u32>() {
|
||||
loop {
|
||||
if is_match::<2>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
|
||||
break;
|
||||
}
|
||||
|
||||
goto_next_in_chain!();
|
||||
}
|
||||
} else if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
|
||||
loop {
|
||||
if is_match::<8>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
|
||||
break;
|
||||
}
|
||||
|
||||
goto_next_in_chain!();
|
||||
}
|
||||
} else {
|
||||
loop {
|
||||
if is_match::<4>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
|
||||
break;
|
||||
}
|
||||
|
||||
goto_next_in_chain!();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
loop {
|
||||
if memcmp_n_ptr::<2>(mbase_end.wrapping_add(cur_match as usize), scan_end)
|
||||
&& memcmp_n_ptr::<2>(
|
||||
mbase_start.wrapping_add(cur_match as usize),
|
||||
scan.as_ptr(),
|
||||
)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
goto_next_in_chain!();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we know that there is at least some match. Now count how many bytes really match
|
||||
let len = {
|
||||
// TODO this just looks so incredibly unsafe!
|
||||
let src1: &[u8; 256] =
|
||||
unsafe { &*mbase_start.wrapping_add(cur_match as usize + 2).cast() };
|
||||
|
||||
crate::deflate::compare256::compare256_slice(&scan[2..], src1) + 2
|
||||
};
|
||||
|
||||
assert!(
|
||||
scan.as_ptr() as usize + len <= window.as_ptr() as usize + (state.window_size - 1),
|
||||
"wild scan"
|
||||
);
|
||||
|
||||
if len > best_len {
|
||||
match_start = (cur_match - match_offset) as usize;
|
||||
|
||||
/* Do not look for matches beyond the end of the input. */
|
||||
if len > lookahead {
|
||||
return (lookahead, match_start);
|
||||
}
|
||||
best_len = len;
|
||||
if best_len >= nice_match {
|
||||
return (best_len, match_start);
|
||||
}
|
||||
|
||||
offset = best_len - 1;
|
||||
if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
|
||||
offset -= 2;
|
||||
if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
|
||||
offset -= 4;
|
||||
}
|
||||
}
|
||||
|
||||
if UNALIGNED64_OK {
|
||||
scan_end.copy_from_slice(&scan[offset..][..core::mem::size_of::<u64>()]);
|
||||
} else if UNALIGNED_OK {
|
||||
scan_end[..4].copy_from_slice(&scan[offset..][..core::mem::size_of::<u32>()]);
|
||||
} else {
|
||||
scan_end[..2].copy_from_slice(&scan[offset..][..core::mem::size_of::<u16>()]);
|
||||
}
|
||||
|
||||
// Look for a better string offset
|
||||
if SLOW && len > STD_MIN_MATCH && match_start + len < strstart {
|
||||
let mut pos: Pos;
|
||||
// uint32_t i, hash;
|
||||
// unsigned char *scan_endstr;
|
||||
|
||||
/* Go back to offset 0 */
|
||||
cur_match -= match_offset;
|
||||
match_offset = 0;
|
||||
let mut next_pos = cur_match;
|
||||
|
||||
for i in 0..=len - STD_MIN_MATCH {
|
||||
pos = state.prev[(cur_match as usize + i) & wmask];
|
||||
if pos < next_pos {
|
||||
/* Hash chain is more distant, use it */
|
||||
if pos <= limit_base + i as Pos {
|
||||
return break_matching(state, best_len, match_start);
|
||||
}
|
||||
next_pos = pos;
|
||||
match_offset = i as Pos;
|
||||
}
|
||||
}
|
||||
/* Switch cur_match to next_pos chain */
|
||||
cur_match = next_pos;
|
||||
|
||||
/* Try hash head at len-(STD_MIN_MATCH-1) position to see if we could get
|
||||
* a better cur_match at the end of string. Using (STD_MIN_MATCH-1) lets
|
||||
* us include one more byte into hash - the byte which will be checked
|
||||
* in main loop now, and which allows to grow match by 1.
|
||||
*/
|
||||
let [scan0, scan1, scan2, ..] = scan[len - (STD_MIN_MATCH + 1)..] else {
|
||||
panic!("index out of bounds");
|
||||
};
|
||||
|
||||
let mut hash = 0;
|
||||
hash = state.update_hash(hash, scan0 as u32);
|
||||
hash = state.update_hash(hash, scan1 as u32);
|
||||
hash = state.update_hash(hash, scan2 as u32);
|
||||
|
||||
pos = state.head[hash as usize];
|
||||
if pos < cur_match {
|
||||
match_offset = (len - (STD_MIN_MATCH + 1)) as Pos;
|
||||
if pos <= limit_base + match_offset {
|
||||
return break_matching(state, best_len, match_start);
|
||||
}
|
||||
cur_match = pos;
|
||||
}
|
||||
|
||||
/* Update offset-dependent variables */
|
||||
limit = limit_base + match_offset;
|
||||
mbase_start = window.as_ptr().wrapping_sub(match_offset as usize);
|
||||
mbase_end = mbase_start.wrapping_add(offset);
|
||||
continue;
|
||||
}
|
||||
|
||||
mbase_end = mbase_start.wrapping_add(offset);
|
||||
} else if !SLOW && early_exit {
|
||||
// The probability of finding a match later if we here is pretty low, so for
|
||||
// performance it's best to outright stop here for the lower compression levels
|
||||
break;
|
||||
}
|
||||
|
||||
goto_next_in_chain!();
|
||||
}
|
||||
|
||||
(best_len, match_start)
|
||||
}
|
||||
|
||||
fn break_matching(state: &State, best_len: usize, match_start: usize) -> (usize, usize) {
|
||||
(Ord::min(best_len, state.lookahead), match_start)
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
use core::marker::PhantomData;
|
||||
|
||||
use crate::allocate::Allocator;
|
||||
|
||||
pub struct Pending<'a> {
|
||||
buf: *mut u8,
|
||||
out: *mut u8,
|
||||
pub(crate) pending: usize,
|
||||
end: *mut u8,
|
||||
_marker: PhantomData<&'a mut [u8]>,
|
||||
}
|
||||
|
||||
impl<'a> Pending<'a> {
|
||||
pub fn reset_keep(&mut self) {
|
||||
// keep the buffer as it is
|
||||
self.pending = 0;
|
||||
}
|
||||
|
||||
pub fn pending(&self) -> &[u8] {
|
||||
unsafe { core::slice::from_raw_parts(self.out, self.pending) }
|
||||
}
|
||||
|
||||
pub(crate) fn remaining(&self) -> usize {
|
||||
self.end as usize - self.out as usize
|
||||
}
|
||||
|
||||
pub(crate) fn capacity(&self) -> usize {
|
||||
self.end as usize - self.buf as usize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[track_caller]
|
||||
pub fn advance(&mut self, n: usize) {
|
||||
assert!(n <= self.remaining(), "advancing past the end");
|
||||
debug_assert!(self.pending >= n);
|
||||
|
||||
self.out = self.out.wrapping_add(n);
|
||||
self.pending -= n;
|
||||
|
||||
if self.pending == 0 {
|
||||
self.out = self.buf;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[track_caller]
|
||||
pub fn rewind(&mut self, n: usize) {
|
||||
assert!(n <= self.pending, "rewinding past then start");
|
||||
|
||||
self.pending -= n;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[track_caller]
|
||||
pub fn extend(&mut self, buf: &[u8]) {
|
||||
assert!(
|
||||
self.remaining() >= buf.len(),
|
||||
"buf.len() must fit in remaining()"
|
||||
);
|
||||
|
||||
unsafe {
|
||||
core::ptr::copy_nonoverlapping(buf.as_ptr(), self.out.add(self.pending), buf.len());
|
||||
}
|
||||
|
||||
self.pending += buf.len();
|
||||
}
|
||||
|
||||
pub(crate) fn new_in(alloc: &Allocator<'a>, len: usize) -> Option<Self> {
|
||||
let range = alloc.allocate_slice::<u8>(len)?.as_mut_ptr_range();
|
||||
|
||||
Some(Self {
|
||||
buf: range.start as *mut u8,
|
||||
out: range.start as *mut u8,
|
||||
end: range.end as *mut u8,
|
||||
pending: 0,
|
||||
_marker: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn clone_in(&self, alloc: &Allocator<'a>) -> Option<Self> {
|
||||
let len = self.end as usize - self.buf as usize;
|
||||
let mut clone = Self::new_in(alloc, len)?;
|
||||
|
||||
unsafe { core::ptr::copy_nonoverlapping(self.buf, clone.buf, len) };
|
||||
clone.out = unsafe { clone.buf.add(self.out as usize - self.buf as usize) };
|
||||
clone.pending = self.pending;
|
||||
|
||||
Some(clone)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn drop_in(&self, alloc: &Allocator) {
|
||||
let len = self.end as usize - self.buf as usize;
|
||||
alloc.deallocate(self.buf, len);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
pub fn slide_hash(state: &mut crate::deflate::State) {
|
||||
let wsize = state.w_size as u16;
|
||||
|
||||
slide_hash_chain(state.head, wsize);
|
||||
slide_hash_chain(state.prev, wsize);
|
||||
}
|
||||
|
||||
fn slide_hash_chain(table: &mut [u16], wsize: u16) {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
if crate::cpu_features::is_enabled_avx2() {
|
||||
return avx2::slide_hash_chain(table, wsize);
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
if crate::cpu_features::is_enabled_neon() {
|
||||
return neon::slide_hash_chain(table, wsize);
|
||||
}
|
||||
|
||||
rust::slide_hash_chain(table, wsize);
|
||||
}
|
||||
|
||||
mod rust {
|
||||
pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {
|
||||
for m in table.iter_mut() {
|
||||
*m = m.saturating_sub(wsize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod neon {
|
||||
use core::arch::aarch64::{
|
||||
uint16x8_t, uint16x8x4_t, vdupq_n_u16, vld1q_u16_x4, vqsubq_u16, vst1q_u16_x4,
|
||||
};
|
||||
|
||||
pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {
|
||||
assert!(crate::cpu_features::is_enabled_neon());
|
||||
unsafe { slide_hash_chain_internal(table, wsize) }
|
||||
}
|
||||
|
||||
#[target_feature(enable = "neon")]
|
||||
unsafe fn slide_hash_chain_internal(table: &mut [u16], wsize: u16) {
|
||||
debug_assert_eq!(table.len() % 32, 0);
|
||||
|
||||
let v = unsafe { vdupq_n_u16(wsize) };
|
||||
|
||||
for chunk in table.chunks_exact_mut(32) {
|
||||
unsafe {
|
||||
let p0 = vld1q_u16_x4(chunk.as_ptr());
|
||||
let p0 = vqsubq_u16_x4_x1(p0, v);
|
||||
vst1q_u16_x4(chunk.as_mut_ptr(), p0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "neon")]
|
||||
unsafe fn vqsubq_u16_x4_x1(a: uint16x8x4_t, b: uint16x8_t) -> uint16x8x4_t {
|
||||
uint16x8x4_t(
|
||||
vqsubq_u16(a.0, b),
|
||||
vqsubq_u16(a.1, b),
|
||||
vqsubq_u16(a.2, b),
|
||||
vqsubq_u16(a.3, b),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod avx2 {
|
||||
use core::arch::x86_64::{
|
||||
__m256i, _mm256_loadu_si256, _mm256_set1_epi16, _mm256_storeu_si256, _mm256_subs_epu16,
|
||||
};
|
||||
|
||||
pub fn slide_hash_chain(table: &mut [u16], wsize: u16) {
|
||||
assert!(crate::cpu_features::is_enabled_avx2());
|
||||
unsafe { slide_hash_chain_internal(table, wsize) }
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn slide_hash_chain_internal(table: &mut [u16], wsize: u16) {
|
||||
debug_assert_eq!(table.len() % 16, 0);
|
||||
|
||||
let ymm_wsize = unsafe { _mm256_set1_epi16(wsize as i16) };
|
||||
|
||||
for chunk in table.chunks_exact_mut(16) {
|
||||
let chunk = chunk.as_mut_ptr() as *mut __m256i;
|
||||
|
||||
unsafe {
|
||||
let value = _mm256_loadu_si256(chunk);
|
||||
let result = _mm256_subs_epu16(value, ymm_wsize);
|
||||
_mm256_storeu_si256(chunk, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const WSIZE: u16 = 32768;
|
||||
|
||||
const INPUT: [u16; 64] = [
|
||||
0, 0, 28790, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43884, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 64412, 0, 0, 0, 0, 0, 21043, 0, 0, 0, 0, 0, 23707, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 64026, 0, 0, 20182,
|
||||
];
|
||||
|
||||
const OUTPUT: [u16; 64] = [
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 31644, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 31258, 0, 0, 0,
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn test_slide_hash_rust() {
|
||||
let mut input = INPUT;
|
||||
|
||||
rust::slide_hash_chain(&mut input, WSIZE);
|
||||
|
||||
assert_eq!(input, OUTPUT);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn test_slide_hash_avx2() {
|
||||
if crate::cpu_features::is_enabled_avx2() {
|
||||
let mut input = INPUT;
|
||||
|
||||
avx2::slide_hash_chain(&mut input, WSIZE);
|
||||
|
||||
assert_eq!(input, OUTPUT);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
fn test_slide_hash_neon() {
|
||||
if crate::cpu_features::is_enabled_neon() {
|
||||
let mut input = INPUT;
|
||||
|
||||
neon::slide_hash_chain(&mut input, WSIZE);
|
||||
|
||||
assert_eq!(input, OUTPUT);
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck::quickcheck! {
|
||||
fn slide_is_rust_slide(v: Vec<u16>, wsize: u16) -> bool {
|
||||
// pad to a multiple of 32
|
||||
let difference = v.len().next_multiple_of(32) - v.len();
|
||||
let mut v = v;
|
||||
v.extend(core::iter::repeat(u16::MAX).take(difference));
|
||||
|
||||
|
||||
let mut a = v.clone();
|
||||
let mut b = v;
|
||||
|
||||
rust::slide_hash_chain(&mut a, wsize);
|
||||
slide_hash_chain(&mut b, wsize);
|
||||
|
||||
a == b
|
||||
}
|
||||
}
|
||||
}
|
Двоичные данные
third_party/rust/zlib-rs/src/deflate/test-data/inflate_buf_error.dat
поставляемый
Normal file
Двоичные данные
third_party/rust/zlib-rs/src/deflate/test-data/inflate_buf_error.dat
поставляемый
Normal file
Двоичный файл не отображается.
|
@ -0,0 +1,598 @@
|
|||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=10 M=100 Y=50 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>10.000002</xmpG:cyan>
|
||||
<xmpG:magenta>100.000000</xmpG:magenta>
|
||||
<xmpG:yellow>50.000000</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=95 Y=20 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>94.999999</xmpG:magenta>
|
||||
<xmpG:yellow>19.999999</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=25 M=25 Y=40 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>25.000000</xmpG:cyan>
|
||||
<xmpG:magenta>25.000000</xmpG:magenta>
|
||||
<xmpG:yellow>39.999998</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=40 M=45 Y=50 K=5</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>39.999998</xmpG:cyan>
|
||||
<xmpG:magenta>44.999999</xmpG:magenta>
|
||||
<xmpG:yellow>50.000000</xmpG:yellow>
|
||||
<xmpG:black>5.000001</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=50 M=50 Y=60 K=25</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>50.000000</xmpG:cyan>
|
||||
<xmpG:magenta>50.000000</xmpG:magenta>
|
||||
<xmpG:yellow>60.000002</xmpG:yellow>
|
||||
<xmpG:black>25.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=55 M=60 Y=65 K=40</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>55.000001</xmpG:cyan>
|
||||
<xmpG:magenta>60.000002</xmpG:magenta>
|
||||
<xmpG:yellow>64.999998</xmpG:yellow>
|
||||
<xmpG:black>39.999998</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=25 M=40 Y=65 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>25.000000</xmpG:cyan>
|
||||
<xmpG:magenta>39.999998</xmpG:magenta>
|
||||
<xmpG:yellow>64.999998</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=30 M=50 Y=75 K=10</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>30.000001</xmpG:cyan>
|
||||
<xmpG:magenta>50.000000</xmpG:magenta>
|
||||
<xmpG:yellow>75.000000</xmpG:yellow>
|
||||
<xmpG:black>10.000002</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=35 M=60 Y=80 K=25</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>35.000002</xmpG:cyan>
|
||||
<xmpG:magenta>60.000002</xmpG:magenta>
|
||||
<xmpG:yellow>80.000001</xmpG:yellow>
|
||||
<xmpG:black>25.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=40 M=65 Y=90 K=35</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>39.999998</xmpG:cyan>
|
||||
<xmpG:magenta>64.999998</xmpG:magenta>
|
||||
<xmpG:yellow>90.000004</xmpG:yellow>
|
||||
<xmpG:black>35.000002</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=40 M=70 Y=100 K=50</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>39.999998</xmpG:cyan>
|
||||
<xmpG:magenta>69.999999</xmpG:magenta>
|
||||
<xmpG:yellow>100.000000</xmpG:yellow>
|
||||
<xmpG:black>50.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=50 M=70 Y=80 K=70</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>50.000000</xmpG:cyan>
|
||||
<xmpG:magenta>69.999999</xmpG:magenta>
|
||||
<xmpG:yellow>80.000001</xmpG:yellow>
|
||||
<xmpG:black>69.999999</xmpG:black>
|
||||
</rdf:li>
|
||||
</rdf:Seq>
|
||||
</xmpG:Colorants>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:groupName>Grays</xmpG:groupName>
|
||||
<xmpG:groupType>1</xmpG:groupType>
|
||||
<xmpG:Colorants>
|
||||
<rdf:Seq>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=100</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>100.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=90</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>89.999402</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=80</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>79.998797</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=70</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>69.999701</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=60</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>59.999102</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=50</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>50.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=40</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>39.999402</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=30</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>29.998803</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=20</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>19.999701</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=10</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>9.999102</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=0 Y=0 K=5</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>0.000000</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>4.998803</xmpG:black>
|
||||
</rdf:li>
|
||||
</rdf:Seq>
|
||||
</xmpG:Colorants>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:groupName>Brights</xmpG:groupName>
|
||||
<xmpG:groupType>1</xmpG:groupType>
|
||||
<xmpG:Colorants>
|
||||
<rdf:Seq>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=100 Y=100 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>100.000000</xmpG:magenta>
|
||||
<xmpG:yellow>100.000000</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=75 Y=100 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>75.000000</xmpG:magenta>
|
||||
<xmpG:yellow>100.000000</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=0 M=10 Y=95 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>0.000000</xmpG:cyan>
|
||||
<xmpG:magenta>10.000002</xmpG:magenta>
|
||||
<xmpG:yellow>94.999999</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=85 M=10 Y=100 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>84.999996</xmpG:cyan>
|
||||
<xmpG:magenta>10.000002</xmpG:magenta>
|
||||
<xmpG:yellow>100.000000</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=100 M=90 Y=0 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>100.000000</xmpG:cyan>
|
||||
<xmpG:magenta>90.000004</xmpG:magenta>
|
||||
<xmpG:yellow>0.000000</xmpG:yellow>
|
||||
<xmpG:black>0.000000</xmpG:black>
|
||||
</rdf:li>
|
||||
<rdf:li rdf:parseType="Resource">
|
||||
<xmpG:swatchName>C=60 M=90 Y=0 K=0</xmpG:swatchName>
|
||||
<xmpG:mode>CMYK</xmpG:mode>
|
||||
<xmpG:type>PROCESS</xmpG:type>
|
||||
<xmpG:cyan>60.000002</xmpG:cyan>
|
||||
<xmpG:magenta>90.000004</xmpG:magenta>
|
||||
<xmpG:yellow>0.003099</xmpG:yellow>
|
||||
<xmpG:black>0.003099</xmpG:black>
|
||||
</rdf:li>
|
||||
</rdf:Seq>
|
||||
</xmpG:Colorants>
|
||||
</rdf:li>
|
||||
</rdf:Seq>
|
||||
</xmpTPg:SwatchGroups>
|
||||
</rdf:Description>
|
||||
<rdf:Description rdf:about=""
|
||||
xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
|
||||
<pdf:Producer>Adobe PDF library 9.00</pdf:Producer>
|
||||
</rdf:Description>
|
||||
</rdf:RDF>
|
||||
</x:xmpmeta>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<?xpacket end="w"?>
|
||||
endstream
endobj
145 0 obj<</Metadata 144 0 R>>
endobj
1 0 obj<</Contents 3 0 R/Type/Page/Parent 102 0 R/Rotate 0/MediaBox[0 0 612.28302 790.866028]/CropBox[0 0 612.28302 790.866028]/Resources 2 0 R>>
endobj
2 0 obj<</ColorSpace<</Cs8 117 0 R>>/Font<</F2 122 0 R/F3 121 0 R/F6 118 0 R/F7 125 0 R/F8 56 0 R/F9 70 0 R/F10 71 0 R/F11 61 0 R/F12 65 0 R/F13 72 0 R>>/ProcSet[/PDF/Text]/ExtGState<</GS1 113 0 R>>>>
endobj
3 0 obj<</Length 8934/Filter/FlateDecode>>stream
|
||||
hÞ”[MsÜF’½ëWðn<>m |