Bug 1579862 - Introducing HuffmanTableImplementationSaturated;r=arai

We introduce a new implementation of Huffman Tables that trades
space for a fast lookup. For the time being, this implementation
is reserved for tables with a max bitlength of 8 or less.

Depends on D45643

Differential Revision: https://phabricator.services.mozilla.com/D45644

--HG--
extra : moz-landing-system : lando
This commit is contained in:
David Teller 2019-09-13 05:39:10 +00:00
Родитель 0284d41154
Коммит b3f28d9337
2 изменённых файлов: 375 добавлений и 17 удалений

Просмотреть файл

@ -58,6 +58,15 @@ static_assert(
// more bites than necessary, just to be on the safe side.
const uint8_t MAX_PREFIX_BIT_LENGTH = 32;
// Maximal bit length acceptable in a `HuffmanTableSaturated`.
//
// As `HuffmanTableSaturated` require O(2 ^ max bit len) space, we
// cannot afford to use them for all tables. Whenever the max bit
// length in the table is <= MAX_BIT_LENGTH_IN_SATURATED_TABLE,
// we use a `HuffmanTableSaturated`. Otherwise, we fall back to
// a slower but less memory-hungry solution.
const uint8_t MAX_BIT_LENGTH_IN_SATURATED_TABLE = 8;
// The length of the bit buffer, in bits.
const uint8_t BIT_BUFFER_SIZE = 64;
@ -1631,6 +1640,11 @@ FlatHuffmanKey::FlatHuffmanKey(const HuffmanKey* key)
// ---- Implementation of Huffman Tables
template <typename T>
HuffmanTableImplementationGeneric<T>::Iterator::Iterator(
typename HuffmanTableImplementationSaturated<T>::Iterator&& iterator)
: implementation(std::move(iterator)) {}
template <typename T>
HuffmanTableImplementationGeneric<T>::Iterator::Iterator(
typename HuffmanTableImplementationMap<T>::Iterator&& iterator)
@ -1639,6 +1653,9 @@ HuffmanTableImplementationGeneric<T>::Iterator::Iterator(
template <typename T>
void HuffmanTableImplementationGeneric<T>::Iterator::operator++() {
implementation.match(
[](typename HuffmanTableImplementationSaturated<T>::Iterator& iterator) {
iterator.operator++();
},
[](typename HuffmanTableImplementationMap<T>::Iterator& iterator) {
iterator.operator++();
});
@ -1648,6 +1665,12 @@ template <typename T>
bool HuffmanTableImplementationGeneric<T>::Iterator::operator==(
const HuffmanTableImplementationGeneric<T>::Iterator& other) const {
return implementation.match(
[other](const typename HuffmanTableImplementationSaturated<T>::Iterator&
iterator) {
return iterator ==
other.implementation.template as<
typename HuffmanTableImplementationSaturated<T>::Iterator>();
},
[other](
const typename HuffmanTableImplementationMap<T>::Iterator& iterator) {
return iterator ==
@ -1660,6 +1683,12 @@ template <typename T>
bool HuffmanTableImplementationGeneric<T>::Iterator::operator!=(
const HuffmanTableImplementationGeneric<T>::Iterator& other) const {
return implementation.match(
[other](const typename HuffmanTableImplementationSaturated<T>::Iterator&
iterator) {
return iterator !=
other.implementation.template as<
typename HuffmanTableImplementationSaturated<T>::Iterator>();
},
[other](
const typename HuffmanTableImplementationMap<T>::Iterator& iterator) {
return iterator !=
@ -1671,6 +1700,8 @@ bool HuffmanTableImplementationGeneric<T>::Iterator::operator!=(
template <typename T>
const T* HuffmanTableImplementationGeneric<T>::Iterator::operator*() const {
return implementation.match(
[](const typename HuffmanTableImplementationSaturated<T>::Iterator&
iterator) { return iterator.operator*(); },
[](const typename HuffmanTableImplementationMap<T>::Iterator& iterator) {
return iterator.operator*();
});
@ -1685,6 +1716,9 @@ HuffmanTableImplementationGeneric<T>::HuffmanTableImplementationGeneric(
template <typename T>
void HuffmanTableImplementationGeneric<T>::selfCheck() {
this->implementation.match(
[](HuffmanTableImplementationSaturated<T>& implementation) {
implementation.selfCheck();
},
[](HuffmanTableImplementationMap<T>& implementation) {
implementation.selfCheck();
},
@ -1698,6 +1732,10 @@ template <typename T>
typename HuffmanTableImplementationGeneric<T>::Iterator
HuffmanTableImplementationGeneric<T>::begin() const {
return this->implementation.match(
[](const HuffmanTableImplementationSaturated<T>& implementation)
-> HuffmanTableImplementationGeneric<T>::Iterator {
return implementation.begin();
},
[](const HuffmanTableImplementationMap<T>& implementation)
-> HuffmanTableImplementationGeneric<T>::Iterator {
return implementation.begin();
@ -1712,6 +1750,10 @@ template <typename T>
typename HuffmanTableImplementationGeneric<T>::Iterator
HuffmanTableImplementationGeneric<T>::end() const {
return this->implementation.match(
[](const HuffmanTableImplementationSaturated<T>& implementation)
-> HuffmanTableImplementationGeneric<T>::Iterator {
return implementation.end();
},
[](const HuffmanTableImplementationMap<T>& implementation)
-> HuffmanTableImplementationGeneric<T>::Iterator {
return implementation.end();
@ -1725,14 +1767,16 @@ HuffmanTableImplementationGeneric<T>::end() const {
template <typename T>
JS::Result<Ok> HuffmanTableImplementationGeneric<T>::initWithSingleValue(
JSContext* cx, T&& value) {
// Placeholder implementation.
// Future versions will pick between several implementations of
// Huffman Tables depending on `numberOfSymbols`/`maxBitLength`.
this->implementation = {
mozilla::VariantType<HuffmanTableImplementationMap<T>>{}, cx};
MOZ_TRY(this->implementation.template as<HuffmanTableImplementationMap<T>>()
.initWithSingleValue(cx, std::move(value)));
// Only one value: use HuffmanImplementationSaturated
MOZ_ASSERT(this->implementation.template is<
HuffmanTableUnreachable>()); // Make sure that we're initializing.
this->implementation = {
mozilla::VariantType<HuffmanTableImplementationSaturated<T>>{}, cx};
MOZ_TRY(
this->implementation.template as<HuffmanTableImplementationSaturated<T>>()
.initWithSingleValue(cx, std::move(value)));
return Ok();
}
@ -1741,14 +1785,18 @@ JS::Result<Ok> HuffmanTableImplementationGeneric<T>::init(
JSContext* cx, size_t numberOfSymbols, uint8_t maxBitLength) {
MOZ_ASSERT(this->implementation.template is<
HuffmanTableUnreachable>()); // Make sure that we're initializing.
// Placeholder implementation.
// Future versions will pick between several implementations of
// Huffman Tables depending on `numberOfSymbols`/`maxBitLength`.
this->implementation = {
mozilla::VariantType<HuffmanTableImplementationMap<T>>{}, cx};
MOZ_TRY(
this->implementation.template as<HuffmanTableImplementationMap<T>>().init(
cx, numberOfSymbols, maxBitLength));
if (maxBitLength > MAX_BIT_LENGTH_IN_SATURATED_TABLE) {
this->implementation = {
mozilla::VariantType<HuffmanTableImplementationMap<T>>{}, cx};
MOZ_TRY(this->implementation.template as<HuffmanTableImplementationMap<T>>()
.init(cx, numberOfSymbols, maxBitLength));
} else {
this->implementation = {
mozilla::VariantType<HuffmanTableImplementationSaturated<T>>{}, cx};
MOZ_TRY(this->implementation
.template as<HuffmanTableImplementationSaturated<T>>()
.init(cx, numberOfSymbols, maxBitLength));
}
return Ok();
}
@ -1756,6 +1804,12 @@ template <typename T>
JS::Result<Ok> HuffmanTableImplementationGeneric<T>::addSymbol(
uint32_t bits, uint8_t bitLength, T&& value) {
return this->implementation.match(
[bits, bitLength, value = std::move(value)](
HuffmanTableImplementationSaturated<T>&
implementation) mutable /* discard implicit const */
-> JS::Result<Ok> {
return implementation.addSymbol(bits, bitLength, std::move(value));
},
[bits, bitLength, value = std::move(value)](
HuffmanTableImplementationMap<T>&
implementation) mutable /* discard implicit const */
@ -1772,6 +1826,8 @@ template <typename T>
HuffmanEntry<const T*> HuffmanTableImplementationGeneric<T>::lookup(
HuffmanLookup lookup) const {
return this->implementation.match(
[lookup](const HuffmanTableImplementationSaturated<T>& implementation)
-> HuffmanEntry<const T*> { return implementation.lookup(lookup); },
[lookup](const HuffmanTableImplementationMap<T>& implementation)
-> HuffmanEntry<const T*> { return implementation.lookup(lookup); },
[](const HuffmanTableUnreachable&) -> HuffmanEntry<const T*> {
@ -1917,6 +1973,153 @@ HuffmanEntry<const T*> HuffmanTableImplementationMap<T>::lookup(
return HuffmanEntry<const T*>(0, 0, nullptr);
}
template <typename T>
HuffmanTableImplementationSaturated<T>::Iterator::Iterator(
const HuffmanEntry<T>* position)
: position(position) {}
template <typename T>
void HuffmanTableImplementationSaturated<T>::Iterator::operator++() {
position++;
}
template <typename T>
const T* HuffmanTableImplementationSaturated<T>::Iterator::operator*() const {
return &position->value;
}
template <typename T>
bool HuffmanTableImplementationSaturated<T>::Iterator::operator==(
const Iterator& other) const {
return position == other.position;
}
template <typename T>
bool HuffmanTableImplementationSaturated<T>::Iterator::operator!=(
const Iterator& other) const {
return position != other.position;
}
template <typename T>
JS::Result<Ok> HuffmanTableImplementationSaturated<T>::initWithSingleValue(
JSContext* cx, T&& value) {
MOZ_ASSERT(values.empty()); // Make sure that we're initializing.
if (!values.emplaceBack(0, 0, std::move(value))) {
return cx->alreadyReportedError();
}
if (!saturated.emplaceBack(0)) {
return cx->alreadyReportedError();
}
this->maxBitLength = 0;
return Ok();
}
template <typename T>
JS::Result<Ok> HuffmanTableImplementationSaturated<T>::init(
JSContext* cx, size_t numberOfSymbols, uint8_t maxBitLength) {
MOZ_ASSERT(maxBitLength <= MAX_BIT_LENGTH_IN_SATURATED_TABLE);
MOZ_ASSERT(values.empty()); // Make sure that we're initializing.
this->maxBitLength = maxBitLength;
if (!values.initCapacity(numberOfSymbols)) {
return cx->alreadyReportedError();
}
const size_t saturatedLength = 1 << maxBitLength;
if (!saturated.initCapacity(saturatedLength)) {
return cx->alreadyReportedError();
}
// Enlarge `saturated`, as we're going to fill it in random order.
for (size_t i = 0; i < saturatedLength; ++i) {
// We use -1 (which is always invalid) to detect implementation errors.
saturated.infallibleAppend(
size_t(-1)); // Capacity reserved in this method.
}
return Ok();
}
#ifdef DEBUG
template <typename T>
void HuffmanTableImplementationSaturated<T>::selfCheck() {
MOZ_ASSERT(
this->maxBitLength <=
MAX_CODE_BIT_LENGTH); // Double-check that we've initialized properly.
bool foundMaxBitLength = false;
for (size_t i = 0; i < saturated.length(); ++i) {
// Check that all indices have been properly initialized.
// Note: this is an explicit `for(;;)` loop instead of
// a `for(:)` range loop, as knowing `i` should simplify
// debugging.
const size_t index = saturated[i];
MOZ_ASSERT(index != size_t(-1));
if (values[index].key.bitLength == maxBitLength) {
foundMaxBitLength = true;
}
}
MOZ_ASSERT(foundMaxBitLength);
}
#endif // DEBUG
template <typename T>
JS::Result<Ok> HuffmanTableImplementationSaturated<T>::addSymbol(
uint32_t bits, uint8_t bitLength, T&& value) {
MOZ_ASSERT(bitLength != 0,
"Adding a symbol with a bitLength of 0 doesn't make sense.");
MOZ_ASSERT(values.empty() || values.back().key.bitLength <= bitLength,
"Symbols must be ranked by increasing bits length");
MOZ_ASSERT_IF(bitLength != 32 /* >> 32 is UB */, bits >> bitLength == 0);
MOZ_ASSERT(bitLength <= maxBitLength);
const size_t index = values.length();
// First add the value to `values`.
if (!values.emplaceBack(bits, bitLength, std::move(value))) {
MOZ_CRASH(); // Memory was reserved in `init()`.
}
// Notation: in the following, unless otherwise specified, we consider
// values with `maxBitLength` bits exactly.
//
// When we perform lookup, we will extract `maxBitLength` bits from the key
// into a value `0bB...B`. We have a match for `value` if and only if
// `0bB...B` may be decomposed into `0bC...CX...X` such that
// - `0bC...C` is `bitLength` bits long;
// - `0bC...C == bits`.
//
// To perform a fast lookup, we precompute all possible values of `0bB...B`
// for which this condition is true. That's all the values of segment
// `[0bC...C0...0, 0bC...C1...1]`.
const uint8_t padding = maxBitLength - bitLength;
const size_t begin = bits << padding; // `0bC...C0...0` above
const size_t length =
((padding != 0) // `0bC...C1...1` above - `0bC...C0...0` above.
? size_t(-1) >> (8 * sizeof(size_t) - padding)
: 0) +
1;
for (size_t i = begin; i < begin + length; ++i) {
saturated[i] = index;
}
return Ok();
}
template <typename T>
HuffmanEntry<const T*> HuffmanTableImplementationSaturated<T>::lookup(
HuffmanLookup key) const {
// Take the `maxBitLength` highest weight bits of `key`.
// In the documentation of `addSymbol`, this is
// `0bB...B`.
const uint32_t bits = key.leadingBits(maxBitLength);
const size_t index =
saturated[bits]; // Invariants: `saturated.length() == 1 << maxBitLength`
// and `bits <= 1 << maxBitLength`.
const auto& entry =
values[index]; // Invariants: `saturated[i] < values.length()`.
return HuffmanEntry<const T*>(entry.key.bits, entry.key.bitLength,
&entry.value);
}
// -----
// The number of possible interfaces in each sum, indexed by

Просмотреть файл

@ -325,6 +325,158 @@ class HuffmanTableImplementationMap {
friend class HuffmanPreludeReader;
};
// An implementation of Huffman Tables as a vector designed to allow
// constant-time lookups at the expense of high space complexity.
//
// # Time complexity
//
// Lookups take constant time, which essentially consists in two
// simple vector lookups.
//
// # Space complexity
//
// After initialization, a `HuffmanTableImplementationSaturated`
// requires O(2 ^ max bit length in the table) space:
//
// - A vector `values` containing one entry per symbol.
// - A vector `saturated` containing exactly 2 ^ (max bit length in the
// table) entries, which we use to map any combination of `maxBitLength`
// bits onto the only `HuffmanEntry` that may be reached by a prefix
// of these `maxBitLength` bits. See below for more details.
//
// # Algorithm
//
// Consider the following Huffman table
//
// Symbol | Binary Code | Int value of Code | Bit Length
// ------ | ------------ | ----------------- | ----------
// A | 00111 | 7 | 5
// B | 00110 | 6 | 5
// C | 0010 | 2 | 4
// D | 011 | 3 | 3
// E | 010 | 2 | 3
// F | 000 | 0 | 3
// G | 11 | 3 | 2
// H | 10 | 2 | 2
//
// By definition of a Huffman Table, the Binary Codes represent
// paths in a Huffman Tree. Consequently, padding these codes
// to the end would not change the result.
//
// Symbol | Binary Code | Int value of Code | Bit Length
// ------ | ------------ | ----------------- | ----------
// A | 00111 | 7 | 5
// B | 00110 | 6 | 5
// C | 0010? | [4...5] | 4
// D | 011?? | [12...15] | 3
// E | 010?? | [8..11] | 3
// F | 000?? | [0..3] | 3
// G | 11??? | [24...31] | 2
// H | 10??? | [16...23] | 2
//
// Row "Int value of Code" now contains all possible values
// that may be expressed in 5 bits. By using these values
// as array indices, we may therefore represent the
// Huffman table as an array:
//
// Index | Symbol | Bit Length
// -------- | ---------- | -------------
// [0..3] | F | 3
// [4..5] | C | 4
// 6 | B | 5
// 7 | A | 5
// [8..11] | E | 3
// [12..15] | D | 3
// [16..23] | H | 2
// [24..31] | G | 2
//
// By using the next 5 bits in the bit buffer, we may, in
// a single lookup, determine the symbol and the bit length.
//
// In the current implementation, to save some space, we have
// two distinct arrays, one (`values`) with a single instance of each
// symbols bit length, and one (`saturated`) with indices into that
// array.
template <typename T>
class HuffmanTableImplementationSaturated {
public:
explicit HuffmanTableImplementationSaturated(JSContext* cx)
: values(cx), saturated(cx), maxBitLength(-1) {}
HuffmanTableImplementationSaturated(
HuffmanTableImplementationSaturated&& other) = default;
// Initialize a Huffman table containing a single value.
JS::Result<Ok> initWithSingleValue(JSContext* cx, T&& value);
// Initialize a Huffman table containing `numberOfSymbols`.
// Symbols must be added with `addSymbol`.
JS::Result<Ok> init(JSContext* cx, size_t numberOfSymbols,
uint8_t largestBitLength);
#ifdef DEBUG
void selfCheck();
#endif // DEBUG
// Add a `(bit, bits_length) => value` mapping.
JS::Result<Ok> addSymbol(uint32_t bits, uint8_t bits_length, T&& value);
HuffmanTableImplementationSaturated() = delete;
HuffmanTableImplementationSaturated(HuffmanTableImplementationSaturated&) =
delete;
// Lookup a value in the table.
//
// Return an entry with a value of `nullptr` if the value is not in the table.
//
// The lookup may advance `key` by `[0, key.bitLength]` bits. Typically, in a
// table with a single instance, or if the value is not in the table, it
// will advance by 0 bits. The caller is responsible for advancing its
// bitstream by `result.key.bitLength` bits.
HuffmanEntry<const T*> lookup(HuffmanLookup key) const;
// The number of values in the table.
size_t length() const { return values.length(); }
// Iterating in the order of insertion.
struct Iterator {
explicit Iterator(const HuffmanEntry<T>* position);
void operator++();
const T* operator*() const;
bool operator==(const Iterator& other) const;
bool operator!=(const Iterator& other) const;
private:
const HuffmanEntry<T>* position;
};
Iterator begin() const { return Iterator(values.begin()); }
Iterator end() const { return Iterator(values.end()); }
private:
// The entries in this Huffman Table, sorted in the order of insertion.
//
// Invariant (once `init*` has been called):
// - Length is the number of values inserted in the table.
// - for all i, `values[i].bitLength <= maxBitLength`.
Vector<HuffmanEntry<T>> values;
// The entries in this Huffman table, prepared for lookup.
// The `size_t` argument is an index into `values`.
// FIXME: We could make this a `uint8_t` to save space.
//
// Invariant (once `init*` has been called):
// - Length is `1 << maxBitLength`.
// - for all i, `saturated[i] < values.length()`
Vector<size_t> saturated;
// The maximal bitlength of a value in this table.
//
// Invariant (once `init*` has been called):
// - `maxBitLength <= MAX_CODE_BIT_LENGTH`
uint8_t maxBitLength;
friend class HuffmanPreludeReader;
};
// An empty Huffman table. Attempting to get a value from this table is a syntax
// error. This is the default value for `HuffmanTableValue` and represents all
// states that may not be reached.
@ -360,6 +512,7 @@ struct HuffmanTableImplementationGeneric {
size_t length() const;
struct Iterator {
Iterator(typename HuffmanTableImplementationSaturated<T>::Iterator&&);
Iterator(typename HuffmanTableImplementationMap<T>::Iterator&&);
void operator++();
const T* operator*() const;
@ -367,7 +520,8 @@ struct HuffmanTableImplementationGeneric {
bool operator!=(const Iterator& other) const;
private:
mozilla::Variant<typename HuffmanTableImplementationMap<T>::Iterator>
mozilla::Variant<typename HuffmanTableImplementationSaturated<T>::Iterator,
typename HuffmanTableImplementationMap<T>::Iterator>
implementation;
};
@ -386,7 +540,8 @@ struct HuffmanTableImplementationGeneric {
HuffmanEntry<const T*> lookup(HuffmanLookup key) const;
private:
mozilla::Variant<HuffmanTableImplementationMap<T>, HuffmanTableUnreachable>
mozilla::Variant<HuffmanTableImplementationSaturated<T>,
HuffmanTableImplementationMap<T>, HuffmanTableUnreachable>
implementation;
};