зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1355671
- Import brotli encoder and command line tool code. r=gps
--HG-- extra : rebase_source : 1f8536a985a33fc8bbce0f8df93c3d4ca282e05c
This commit is contained in:
Родитель
abdb6cbf08
Коммит
202ee4cc41
|
@ -0,0 +1,14 @@
|
|||
#brotli/enc
|
||||
|
||||
include ../shared.mk
|
||||
|
||||
OBJS_NODICT = backward_references.o block_splitter.o brotli_bit_stream.o compress_fragment.o compress_fragment_two_pass.o encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o metablock.o static_dict.o streams.o utf8_util.o
|
||||
OBJS = $(OBJS_NODICT) dictionary.o
|
||||
|
||||
nodict : $(OBJS_NODICT)
|
||||
|
||||
all : $(OBJS)
|
||||
|
||||
clean :
|
||||
rm -f $(OBJS) $(SO)
|
||||
|
|
@ -0,0 +1,858 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function to find backward reference copies.
|
||||
|
||||
#include "./backward_references.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./literal_cost.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// The maximum length for which the zopflification uses distinct distances.
|
||||
static const uint16_t kMaxZopfliLen = 325;
|
||||
|
||||
// Histogram based cost model for zopflification.
|
||||
class ZopfliCostModel {
|
||||
public:
|
||||
ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {}
|
||||
|
||||
void SetFromCommands(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const Command* commands,
|
||||
size_t num_commands,
|
||||
size_t last_insert_len) {
|
||||
std::vector<uint32_t> histogram_literal(256, 0);
|
||||
std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0);
|
||||
std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0);
|
||||
|
||||
size_t pos = position - last_insert_len;
|
||||
for (size_t i = 0; i < num_commands; i++) {
|
||||
size_t inslength = commands[i].insert_len_;
|
||||
size_t copylength = commands[i].copy_len();
|
||||
size_t distcode = commands[i].dist_prefix_;
|
||||
size_t cmdcode = commands[i].cmd_prefix_;
|
||||
|
||||
histogram_cmd[cmdcode]++;
|
||||
if (cmdcode >= 128) histogram_dist[distcode]++;
|
||||
|
||||
for (size_t j = 0; j < inslength; j++) {
|
||||
histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
|
||||
}
|
||||
|
||||
pos += inslength + copylength;
|
||||
}
|
||||
|
||||
std::vector<float> cost_literal;
|
||||
Set(histogram_literal, &cost_literal);
|
||||
Set(histogram_cmd, &cost_cmd_);
|
||||
Set(histogram_dist, &cost_dist_);
|
||||
|
||||
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
|
||||
min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
|
||||
}
|
||||
|
||||
literal_costs_.resize(num_bytes + 1);
|
||||
literal_costs_[0] = 0.0;
|
||||
for (size_t i = 0; i < num_bytes; ++i) {
|
||||
literal_costs_[i + 1] = literal_costs_[i] +
|
||||
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
|
||||
}
|
||||
}
|
||||
|
||||
void SetFromLiteralCosts(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
literal_costs_.resize(num_bytes + 2);
|
||||
EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
|
||||
ringbuffer, &literal_costs_[1]);
|
||||
literal_costs_[0] = 0.0;
|
||||
for (size_t i = 0; i < num_bytes; ++i) {
|
||||
literal_costs_[i + 1] += literal_costs_[i];
|
||||
}
|
||||
cost_cmd_.resize(kNumCommandPrefixes);
|
||||
cost_dist_.resize(kNumDistancePrefixes);
|
||||
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
|
||||
cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
|
||||
}
|
||||
for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
|
||||
cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
|
||||
}
|
||||
min_cost_cmd_ = static_cast<float>(FastLog2(11));
|
||||
}
|
||||
|
||||
float GetCommandCost(
|
||||
size_t dist_code, size_t length_code, size_t insert_length) const {
|
||||
uint16_t inscode = GetInsertLengthCode(insert_length);
|
||||
uint16_t copycode = GetCopyLengthCode(length_code);
|
||||
uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0);
|
||||
uint16_t dist_symbol;
|
||||
uint32_t distextra;
|
||||
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
|
||||
uint32_t distnumextra = distextra >> 24;
|
||||
|
||||
float result = static_cast<float>(
|
||||
GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
|
||||
result += cost_cmd_[cmdcode];
|
||||
if (cmdcode >= 128) result += cost_dist_[dist_symbol];
|
||||
return result;
|
||||
}
|
||||
|
||||
float GetLiteralCosts(size_t from, size_t to) const {
|
||||
return literal_costs_[to] - literal_costs_[from];
|
||||
}
|
||||
|
||||
float GetMinCostCmd(void) const {
|
||||
return min_cost_cmd_;
|
||||
}
|
||||
|
||||
private:
|
||||
void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
|
||||
cost->resize(histogram.size());
|
||||
size_t sum = 0;
|
||||
for (size_t i = 0; i < histogram.size(); i++) {
|
||||
sum += histogram[i];
|
||||
}
|
||||
float log2sum = static_cast<float>(FastLog2(sum));
|
||||
for (size_t i = 0; i < histogram.size(); i++) {
|
||||
if (histogram[i] == 0) {
|
||||
(*cost)[i] = log2sum + 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Shannon bits for this symbol.
|
||||
(*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
|
||||
|
||||
// Cannot be coded with less than 1 bit
|
||||
if ((*cost)[i] < 1) (*cost)[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<float> cost_cmd_; // The insert and copy length symbols.
|
||||
std::vector<float> cost_dist_;
|
||||
// Cumulative costs of literals per position in the stream.
|
||||
std::vector<float> literal_costs_;
|
||||
float min_cost_cmd_;
|
||||
};
|
||||
|
||||
inline size_t ComputeDistanceCode(size_t distance,
|
||||
size_t max_distance,
|
||||
int quality,
|
||||
const int* dist_cache) {
|
||||
if (distance <= max_distance) {
|
||||
if (distance == static_cast<size_t>(dist_cache[0])) {
|
||||
return 0;
|
||||
} else if (distance == static_cast<size_t>(dist_cache[1])) {
|
||||
return 1;
|
||||
} else if (distance == static_cast<size_t>(dist_cache[2])) {
|
||||
return 2;
|
||||
} else if (distance == static_cast<size_t>(dist_cache[3])) {
|
||||
return 3;
|
||||
} else if (quality > 3 && distance >= 6) {
|
||||
for (size_t k = 4; k < kNumDistanceShortCodes; ++k) {
|
||||
size_t idx = kDistanceCacheIndex[k];
|
||||
size_t candidate =
|
||||
static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]);
|
||||
static const size_t kLimits[16] = { 0, 0, 0, 0,
|
||||
6, 6, 11, 11,
|
||||
11, 11, 11, 11,
|
||||
12, 12, 12, 12 };
|
||||
if (distance == candidate && distance >= kLimits[k]) {
|
||||
return k;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return distance + 15;
|
||||
}
|
||||
|
||||
// REQUIRES: len >= 2, start_pos <= pos
|
||||
// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
|
||||
// Maintains the "ZopfliNode array invariant".
|
||||
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
|
||||
size_t len, size_t len_code, size_t dist,
|
||||
size_t short_code, float cost) {
|
||||
ZopfliNode& next = nodes[pos + len];
|
||||
next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
|
||||
next.distance = static_cast<uint32_t>(dist | (short_code << 25));
|
||||
next.insert_length = static_cast<uint32_t>(pos - start_pos);
|
||||
next.cost = cost;
|
||||
}
|
||||
|
||||
// Maintains the smallest 2^k cost difference together with their positions
|
||||
class StartPosQueue {
|
||||
public:
|
||||
struct PosData {
|
||||
size_t pos;
|
||||
int distance_cache[4];
|
||||
float costdiff;
|
||||
};
|
||||
|
||||
explicit StartPosQueue(int bits)
|
||||
: mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
|
||||
|
||||
void Clear(void) {
|
||||
idx_ = 0;
|
||||
}
|
||||
|
||||
void Push(const StartPosQueue::PosData& posdata) {
|
||||
size_t offset = ~idx_ & mask_;
|
||||
++idx_;
|
||||
size_t len = size();
|
||||
q_[offset] = posdata;
|
||||
/* Restore the sorted order. In the list of |len| items at most |len - 1|
|
||||
adjacent element comparisons / swaps are required. */
|
||||
for (size_t i = 1; i < len; ++i) {
|
||||
if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
|
||||
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
|
||||
}
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
|
||||
size_t size(void) const { return std::min(idx_, mask_ + 1); }
|
||||
|
||||
const StartPosQueue::PosData& GetStartPosData(size_t k) const {
|
||||
return q_[(k - idx_) & mask_];
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t mask_;
|
||||
std::vector<PosData> q_;
|
||||
size_t idx_;
|
||||
};
|
||||
|
||||
// Returns the minimum possible copy length that can improve the cost of any
|
||||
// future position.
|
||||
static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
|
||||
const ZopfliNode* nodes,
|
||||
const ZopfliCostModel& model,
|
||||
const size_t num_bytes,
|
||||
const size_t pos) {
|
||||
// Compute the minimum possible cost of reaching any future position.
|
||||
const size_t start0 = queue.GetStartPosData(0).pos;
|
||||
float min_cost = (nodes[start0].cost +
|
||||
model.GetLiteralCosts(start0, pos) +
|
||||
model.GetMinCostCmd());
|
||||
size_t len = 2;
|
||||
size_t next_len_bucket = 4;
|
||||
size_t next_len_offset = 10;
|
||||
while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
|
||||
// We already reached (pos + len) with no more cost than the minimum
|
||||
// possible cost of reaching anything from this pos, so there is no point in
|
||||
// looking for lengths <= len.
|
||||
++len;
|
||||
if (len == next_len_offset) {
|
||||
// We reached the next copy length code bucket, so we add one more
|
||||
// extra bit to the minimum cost.
|
||||
min_cost += static_cast<float>(1.0);
|
||||
next_len_offset += next_len_bucket;
|
||||
next_len_bucket *= 2;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
// Fills in dist_cache[0..3] with the last four distances (as defined by
|
||||
// Section 4. of the Spec) that would be used at (block_start + pos) if we
|
||||
// used the shortest path of commands from block_start, computed from
|
||||
// nodes[0..pos]. The last four distances at block_start are in
|
||||
// starting_dist_cach[0..3].
|
||||
// REQUIRES: nodes[pos].cost < kInfinity
|
||||
// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
|
||||
static void ComputeDistanceCache(const size_t block_start,
|
||||
const size_t pos,
|
||||
const size_t max_backward,
|
||||
const int* starting_dist_cache,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache) {
|
||||
int idx = 0;
|
||||
size_t p = pos;
|
||||
// Because of prerequisite, does at most (pos + 1) / 2 iterations.
|
||||
while (idx < 4 && p > 0) {
|
||||
const size_t clen = nodes[p].copy_length();
|
||||
const size_t ilen = nodes[p].insert_length;
|
||||
const size_t dist = nodes[p].copy_distance();
|
||||
// Since block_start + p is the end position of the command, the copy part
|
||||
// starts from block_start + p - clen. Distances that are greater than this
|
||||
// or greater than max_backward are static dictionary references, and do
|
||||
// not update the last distances. Also distance code 0 (last distance)
|
||||
// does not update the last distances.
|
||||
if (dist + clen <= block_start + p && dist <= max_backward &&
|
||||
nodes[p].distance_code() > 0) {
|
||||
dist_cache[idx++] = static_cast<int>(dist);
|
||||
}
|
||||
// Because of prerequisite, p >= clen + ilen >= 2.
|
||||
p -= clen + ilen;
|
||||
}
|
||||
for (; idx < 4; ++idx) {
|
||||
dist_cache[idx] = *starting_dist_cache++;
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateNodes(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t pos,
|
||||
const uint8_t* ringbuffer,
|
||||
const size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* starting_dist_cache,
|
||||
const size_t num_matches,
|
||||
const BackwardMatch* matches,
|
||||
const ZopfliCostModel* model,
|
||||
StartPosQueue* queue,
|
||||
ZopfliNode* nodes) {
|
||||
size_t cur_ix = block_start + pos;
|
||||
size_t cur_ix_masked = cur_ix & ringbuffer_mask;
|
||||
size_t max_distance = std::min(cur_ix, max_backward_limit);
|
||||
|
||||
if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
|
||||
StartPosQueue::PosData posdata;
|
||||
posdata.pos = pos;
|
||||
posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
|
||||
ComputeDistanceCache(block_start, pos, max_backward_limit,
|
||||
starting_dist_cache, nodes, posdata.distance_cache);
|
||||
queue->Push(posdata);
|
||||
}
|
||||
|
||||
const size_t min_len = ComputeMinimumCopyLength(
|
||||
*queue, nodes, *model, num_bytes, pos);
|
||||
|
||||
// Go over the command starting positions in order of increasing cost
|
||||
// difference.
|
||||
for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
|
||||
const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
|
||||
const size_t start = posdata.pos;
|
||||
const float start_costdiff = posdata.costdiff;
|
||||
|
||||
// Look for last distance matches using the distance cache from this
|
||||
// starting position.
|
||||
size_t best_len = min_len - 1;
|
||||
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
|
||||
const size_t idx = kDistanceCacheIndex[j];
|
||||
const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
|
||||
kDistanceCacheOffset[j]);
|
||||
size_t prev_ix = cur_ix - backward;
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
if (PREDICT_FALSE(backward > max_distance)) {
|
||||
continue;
|
||||
}
|
||||
prev_ix &= ringbuffer_mask;
|
||||
|
||||
if (cur_ix_masked + best_len > ringbuffer_mask ||
|
||||
prev_ix + best_len > ringbuffer_mask ||
|
||||
ringbuffer[cur_ix_masked + best_len] !=
|
||||
ringbuffer[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
|
||||
&ringbuffer[cur_ix_masked],
|
||||
num_bytes - pos);
|
||||
for (size_t l = best_len + 1; l <= len; ++l) {
|
||||
const size_t inslen = pos - start;
|
||||
float cmd_cost = model->GetCommandCost(j, l, inslen);
|
||||
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
|
||||
if (cost < nodes[pos + l].cost) {
|
||||
UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
|
||||
}
|
||||
best_len = l;
|
||||
}
|
||||
}
|
||||
|
||||
// At higher iterations look only for new last distance matches, since
|
||||
// looking only for new command start positions with the same distances
|
||||
// does not help much.
|
||||
if (k >= 2) continue;
|
||||
|
||||
// Loop through all possible copy lengths at this position.
|
||||
size_t len = min_len;
|
||||
for (size_t j = 0; j < num_matches; ++j) {
|
||||
BackwardMatch match = matches[j];
|
||||
size_t dist = match.distance;
|
||||
bool is_dictionary_match = dist > max_distance;
|
||||
// We already tried all possible last distance matches, so we can use
|
||||
// normal distance code here.
|
||||
size_t dist_code = dist + 15;
|
||||
// Try all copy lengths up until the maximum copy length corresponding
|
||||
// to this distance. If the distance refers to the static dictionary, or
|
||||
// the maximum length is long enough, try only one maximum length.
|
||||
size_t max_len = match.length();
|
||||
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
|
||||
len = max_len;
|
||||
}
|
||||
for (; len <= max_len; ++len) {
|
||||
size_t len_code = is_dictionary_match ? match.length_code() : len;
|
||||
const size_t inslen = pos - start;
|
||||
float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
|
||||
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
|
||||
if (cost < nodes[pos + len].cost) {
|
||||
UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ComputeShortestPathFromNodes(size_t num_bytes,
|
||||
const ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path) {
|
||||
std::vector<uint32_t> backwards(num_bytes / 2 + 1);
|
||||
size_t index = num_bytes;
|
||||
while (nodes[index].cost == kInfinity) --index;
|
||||
size_t num_commands = 0;
|
||||
while (index != 0) {
|
||||
size_t len = nodes[index].command_length();
|
||||
backwards[num_commands++] = static_cast<uint32_t>(len);
|
||||
index -= len;
|
||||
}
|
||||
path->resize(num_commands);
|
||||
for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
|
||||
(*path)[j] = backwards[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
void ZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t max_backward_limit,
|
||||
const std::vector<uint32_t>& path,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_literals) {
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < path.size(); i++) {
|
||||
const ZopfliNode& next = nodes[pos + path[i]];
|
||||
size_t copy_length = next.copy_length();
|
||||
size_t insert_length = next.insert_length;
|
||||
pos += insert_length;
|
||||
if (i == 0) {
|
||||
insert_length += *last_insert_len;
|
||||
*last_insert_len = 0;
|
||||
}
|
||||
size_t distance = next.copy_distance();
|
||||
size_t len_code = next.length_code();
|
||||
size_t max_distance = std::min(block_start + pos, max_backward_limit);
|
||||
bool is_dictionary = (distance > max_distance);
|
||||
size_t dist_code = next.distance_code();
|
||||
|
||||
Command cmd(insert_length, copy_length, len_code, dist_code);
|
||||
commands[i] = cmd;
|
||||
|
||||
if (!is_dictionary && dist_code > 0) {
|
||||
dist_cache[3] = dist_cache[2];
|
||||
dist_cache[2] = dist_cache[1];
|
||||
dist_cache[1] = dist_cache[0];
|
||||
dist_cache[0] = static_cast<int>(distance);
|
||||
}
|
||||
|
||||
*num_literals += insert_length;
|
||||
pos += copy_length;
|
||||
}
|
||||
*last_insert_len += num_bytes - pos;
|
||||
}
|
||||
|
||||
static void ZopfliIterate(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
const ZopfliCostModel& model,
|
||||
const std::vector<uint32_t>& num_matches,
|
||||
const std::vector<BackwardMatch>& matches,
|
||||
ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path) {
|
||||
nodes[0].length = 0;
|
||||
nodes[0].cost = 0;
|
||||
StartPosQueue queue(3);
|
||||
size_t cur_match_pos = 0;
|
||||
for (size_t i = 0; i + 3 < num_bytes; i++) {
|
||||
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, dist_cache, num_matches[i],
|
||||
&matches[cur_match_pos], &model, &queue, &nodes[0]);
|
||||
cur_match_pos += num_matches[i];
|
||||
// The zopflification can be too slow in case of very long lengths, so in
|
||||
// such case skip it all, it does not cost a lot of compression ratio.
|
||||
if (num_matches[i] == 1 &&
|
||||
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
|
||||
i += matches[cur_match_pos - 1].length() - 1;
|
||||
queue.Clear();
|
||||
}
|
||||
}
|
||||
ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
|
||||
}
|
||||
|
||||
|
||||
void ZopfliComputeShortestPath(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
Hashers::H10* hasher,
|
||||
ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path) {
|
||||
nodes[0].length = 0;
|
||||
nodes[0].cost = 0;
|
||||
ZopfliCostModel* model = new ZopfliCostModel;
|
||||
model->SetFromLiteralCosts(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask);
|
||||
StartPosQueue queue(3);
|
||||
BackwardMatch matches[Hashers::H10::kMaxNumMatches];
|
||||
for (size_t i = 0; i + 3 < num_bytes; i++) {
|
||||
const size_t max_distance = std::min(position + i, max_backward_limit);
|
||||
size_t num_matches = hasher->FindAllMatches(
|
||||
ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
|
||||
matches);
|
||||
if (num_matches > 0 &&
|
||||
matches[num_matches - 1].length() > kMaxZopfliLen) {
|
||||
matches[0] = matches[num_matches - 1];
|
||||
num_matches = 1;
|
||||
}
|
||||
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, dist_cache, num_matches, matches,
|
||||
model, &queue, nodes);
|
||||
if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
|
||||
for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
|
||||
++i;
|
||||
if (matches[0].length() - j < 64 &&
|
||||
num_bytes - i >= kMaxTreeCompLength) {
|
||||
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
|
||||
}
|
||||
}
|
||||
queue.Clear();
|
||||
}
|
||||
}
|
||||
delete model;
|
||||
ComputeShortestPathFromNodes(num_bytes, nodes, path);
|
||||
}
|
||||
|
||||
template<typename Hasher>
|
||||
void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
bool is_last,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const int quality,
|
||||
const int lgwin,
|
||||
Hasher* hasher,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
// Set maximum distance, see section 9.1. of the spec.
|
||||
const size_t max_backward_limit = (1 << lgwin) - 16;
|
||||
|
||||
// Choose which init method is faster.
|
||||
// memset is about 100 times faster than hasher->InitForData().
|
||||
const size_t kMaxBytesForPartialHashInit = Hasher::kHashMapSize >> 7;
|
||||
if (position == 0 && is_last && num_bytes <= kMaxBytesForPartialHashInit) {
|
||||
hasher->InitForData(ringbuffer, num_bytes);
|
||||
} else {
|
||||
hasher->Init();
|
||||
}
|
||||
if (num_bytes >= 3 && position >= 3) {
|
||||
// Prepare the hashes for three last bytes of the last write.
|
||||
// These could not be calculated before, since they require knowledge
|
||||
// of both the previous and the current block.
|
||||
hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
|
||||
static_cast<uint32_t>(position - 3));
|
||||
hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
|
||||
static_cast<uint32_t>(position - 2));
|
||||
hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
|
||||
static_cast<uint32_t>(position - 1));
|
||||
}
|
||||
const Command * const orig_commands = commands;
|
||||
size_t insert_length = *last_insert_len;
|
||||
size_t i = position & ringbuffer_mask;
|
||||
const size_t i_diff = position - i;
|
||||
const size_t i_end = i + num_bytes;
|
||||
|
||||
// For speed up heuristics for random data.
|
||||
const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
|
||||
size_t apply_random_heuristics = i + random_heuristics_window_size;
|
||||
|
||||
// Minimum score to accept a backward reference.
|
||||
const double kMinScore = 4.0;
|
||||
|
||||
while (i + Hasher::kHashTypeLength - 1 < i_end) {
|
||||
size_t max_length = i_end - i;
|
||||
size_t max_distance = std::min(i + i_diff, max_backward_limit);
|
||||
size_t best_len = 0;
|
||||
size_t best_len_code = 0;
|
||||
size_t best_dist = 0;
|
||||
double best_score = kMinScore;
|
||||
bool match_found = hasher->FindLongestMatch(
|
||||
ringbuffer, ringbuffer_mask,
|
||||
dist_cache, static_cast<uint32_t>(i + i_diff), max_length, max_distance,
|
||||
&best_len, &best_len_code, &best_dist, &best_score);
|
||||
if (match_found) {
|
||||
// Found a match. Let's look for something even better ahead.
|
||||
int delayed_backward_references_in_row = 0;
|
||||
for (;;) {
|
||||
--max_length;
|
||||
size_t best_len_2 =
|
||||
quality < 5 ? std::min(best_len - 1, max_length) : 0;
|
||||
size_t best_len_code_2 = 0;
|
||||
size_t best_dist_2 = 0;
|
||||
double best_score_2 = kMinScore;
|
||||
max_distance = std::min(i + i_diff + 1, max_backward_limit);
|
||||
match_found = hasher->FindLongestMatch(
|
||||
ringbuffer, ringbuffer_mask,
|
||||
dist_cache, static_cast<uint32_t>(i + i_diff + 1),
|
||||
max_length, max_distance,
|
||||
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
|
||||
double cost_diff_lazy = 7.0;
|
||||
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
|
||||
// Ok, let's just write one byte for now and start a match from the
|
||||
// next byte.
|
||||
++i;
|
||||
++insert_length;
|
||||
best_len = best_len_2;
|
||||
best_len_code = best_len_code_2;
|
||||
best_dist = best_dist_2;
|
||||
best_score = best_score_2;
|
||||
if (++delayed_backward_references_in_row < 4) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
apply_random_heuristics =
|
||||
i + 2 * best_len + random_heuristics_window_size;
|
||||
max_distance = std::min(i + i_diff, max_backward_limit);
|
||||
// The first 16 codes are special shortcodes, and the minimum offset is 1.
|
||||
size_t distance_code =
|
||||
ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
|
||||
if (best_dist <= max_distance && distance_code > 0) {
|
||||
dist_cache[3] = dist_cache[2];
|
||||
dist_cache[2] = dist_cache[1];
|
||||
dist_cache[1] = dist_cache[0];
|
||||
dist_cache[0] = static_cast<int>(best_dist);
|
||||
}
|
||||
Command cmd(insert_length, best_len, best_len_code, distance_code);
|
||||
*commands++ = cmd;
|
||||
*num_literals += insert_length;
|
||||
insert_length = 0;
|
||||
// Put the hash keys into the table, if there are enough
|
||||
// bytes left.
|
||||
for (size_t j = 2; j < best_len; ++j) {
|
||||
hasher->Store(&ringbuffer[i + j],
|
||||
static_cast<uint32_t>(i + i_diff + j));
|
||||
}
|
||||
i += best_len;
|
||||
} else {
|
||||
++insert_length;
|
||||
++i;
|
||||
// If we have not seen matches for a long time, we can skip some
|
||||
// match lookups. Unsuccessful match lookups are very very expensive
|
||||
// and this kind of a heuristic speeds up compression quite
|
||||
// a lot.
|
||||
if (i > apply_random_heuristics) {
|
||||
// Going through uncompressible data, jump.
|
||||
if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
|
||||
// It is quite a long time since we saw a copy, so we assume
|
||||
// that this data is not compressible, and store hashes less
|
||||
// often. Hashes of non compressible data are less likely to
|
||||
// turn out to be useful in the future, too, so we store less of
|
||||
// them to not to flood out the hash table of good compressible
|
||||
// data.
|
||||
size_t i_jump = std::min(i + 16, i_end - 4);
|
||||
for (; i < i_jump; i += 4) {
|
||||
hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
|
||||
insert_length += 4;
|
||||
}
|
||||
} else {
|
||||
size_t i_jump = std::min(i + 8, i_end - 3);
|
||||
for (; i < i_jump; i += 2) {
|
||||
hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
|
||||
insert_length += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
insert_length += i_end - i;
|
||||
*last_insert_len = insert_length;
|
||||
*num_commands += static_cast<size_t>(commands - orig_commands);
|
||||
}
|
||||
|
||||
void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
bool is_last,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const int quality,
|
||||
const int lgwin,
|
||||
Hashers* hashers,
|
||||
int hash_type,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
bool zopflify = quality > 9;
|
||||
if (zopflify) {
|
||||
Hashers::H10* hasher = hashers->hash_h10;
|
||||
hasher->Init(lgwin, position, num_bytes, is_last);
|
||||
hasher->StitchToPreviousBlock(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask);
|
||||
// Set maximum distance, see section 9.1. of the spec.
|
||||
const size_t max_backward_limit = (1 << lgwin) - 16;
|
||||
if (quality == 10) {
|
||||
std::vector<ZopfliNode> nodes(num_bytes + 1);
|
||||
std::vector<uint32_t> path;
|
||||
ZopfliComputeShortestPath(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, dist_cache, hasher,
|
||||
&nodes[0], &path);
|
||||
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
|
||||
&nodes[0], dist_cache, last_insert_len, commands,
|
||||
num_literals);
|
||||
*num_commands += path.size();
|
||||
return;
|
||||
}
|
||||
std::vector<uint32_t> num_matches(num_bytes);
|
||||
std::vector<BackwardMatch> matches(4 * num_bytes);
|
||||
size_t cur_match_pos = 0;
|
||||
for (size_t i = 0; i + 3 < num_bytes; ++i) {
|
||||
size_t max_distance = std::min(position + i, max_backward_limit);
|
||||
size_t max_length = num_bytes - i;
|
||||
// Ensure that we have enough free slots.
|
||||
if (matches.size() < cur_match_pos + Hashers::H10::kMaxNumMatches) {
|
||||
matches.resize(cur_match_pos + Hashers::H10::kMaxNumMatches);
|
||||
}
|
||||
size_t num_found_matches = hasher->FindAllMatches(
|
||||
ringbuffer, ringbuffer_mask, position + i, max_length, max_distance,
|
||||
&matches[cur_match_pos]);
|
||||
const size_t cur_match_end = cur_match_pos + num_found_matches;
|
||||
for (size_t j = cur_match_pos; j + 1 < cur_match_end; ++j) {
|
||||
assert(matches[j].length() < matches[j + 1].length());
|
||||
assert(matches[j].distance > max_distance ||
|
||||
matches[j].distance <= matches[j + 1].distance);
|
||||
}
|
||||
num_matches[i] = static_cast<uint32_t>(num_found_matches);
|
||||
if (num_found_matches > 0) {
|
||||
const size_t match_len = matches[cur_match_end - 1].length();
|
||||
if (match_len > kMaxZopfliLen) {
|
||||
matches[cur_match_pos++] = matches[cur_match_end - 1];
|
||||
num_matches[i] = 1;
|
||||
for (size_t j = 1; j < match_len; ++j) {
|
||||
++i;
|
||||
if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
|
||||
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
|
||||
}
|
||||
num_matches[i] = 0;
|
||||
}
|
||||
} else {
|
||||
cur_match_pos = cur_match_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t orig_num_literals = *num_literals;
|
||||
size_t orig_last_insert_len = *last_insert_len;
|
||||
int orig_dist_cache[4] = {
|
||||
dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
|
||||
};
|
||||
size_t orig_num_commands = *num_commands;
|
||||
static const size_t kIterations = 2;
|
||||
for (size_t i = 0; i < kIterations; i++) {
|
||||
ZopfliCostModel model;
|
||||
if (i == 0) {
|
||||
model.SetFromLiteralCosts(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask);
|
||||
} else {
|
||||
model.SetFromCommands(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask,
|
||||
commands, *num_commands - orig_num_commands,
|
||||
orig_last_insert_len);
|
||||
}
|
||||
*num_commands = orig_num_commands;
|
||||
*num_literals = orig_num_literals;
|
||||
*last_insert_len = orig_last_insert_len;
|
||||
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
std::vector<ZopfliNode> nodes(num_bytes + 1);
|
||||
std::vector<uint32_t> path;
|
||||
ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, dist_cache, model, num_matches, matches,
|
||||
&nodes[0], &path);
|
||||
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
|
||||
&nodes[0], dist_cache, last_insert_len, commands,
|
||||
num_literals);
|
||||
*num_commands += path.size();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
switch (hash_type) {
|
||||
case 2:
|
||||
CreateBackwardReferences<Hashers::H2>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h2, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
case 3:
|
||||
CreateBackwardReferences<Hashers::H3>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h3, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
case 4:
|
||||
CreateBackwardReferences<Hashers::H4>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h4, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
case 5:
|
||||
CreateBackwardReferences<Hashers::H5>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h5, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
case 6:
|
||||
CreateBackwardReferences<Hashers::H6>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h6, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
case 7:
|
||||
CreateBackwardReferences<Hashers::H7>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h7, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
case 8:
|
||||
CreateBackwardReferences<Hashers::H8>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h8, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
case 9:
|
||||
CreateBackwardReferences<Hashers::H9>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
quality, lgwin, hashers->hash_h9, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,116 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function to find backward reference copies.
|
||||
|
||||
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "./hash.h"
|
||||
#include "./command.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// "commands" points to the next output command to write to, "*num_commands" is
|
||||
// initially the total amount of commands output by previous
|
||||
// CreateBackwardReferences calls, and must be incremented by the amount written
|
||||
// by this call.
|
||||
void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
bool is_last,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const int quality,
|
||||
const int lgwin,
|
||||
Hashers* hashers,
|
||||
int hash_type,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals);
|
||||
|
||||
static const float kInfinity = std::numeric_limits<float>::infinity();
|
||||
|
||||
struct ZopfliNode {
|
||||
ZopfliNode(void) : length(1),
|
||||
distance(0),
|
||||
insert_length(0),
|
||||
cost(kInfinity) {}
|
||||
|
||||
inline uint32_t copy_length() const {
|
||||
return length & 0xffffff;
|
||||
}
|
||||
|
||||
inline uint32_t length_code() const {
|
||||
const uint32_t modifier = length >> 24;
|
||||
return copy_length() + 9u - modifier;
|
||||
}
|
||||
|
||||
inline uint32_t copy_distance() const {
|
||||
return distance & 0x1ffffff;
|
||||
}
|
||||
|
||||
inline uint32_t distance_code() const {
|
||||
const uint32_t short_code = distance >> 25;
|
||||
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
|
||||
}
|
||||
|
||||
inline uint32_t command_length() const {
|
||||
return copy_length() + insert_length;
|
||||
}
|
||||
|
||||
// best length to get up to this byte (not including this byte itself)
|
||||
// highest 8 bit is used to reconstruct the length code
|
||||
uint32_t length;
|
||||
// distance associated with the length
|
||||
// highest 7 bit contains distance short code + 1 (or zero if no short code)
|
||||
uint32_t distance;
|
||||
// number of literal inserts before this copy
|
||||
uint32_t insert_length;
|
||||
// smallest cost to get to this byte from the beginning, as found so far
|
||||
float cost;
|
||||
};
|
||||
|
||||
// Computes the shortest path of commands from position to at most
|
||||
// position + num_bytes.
|
||||
//
|
||||
// On return, path->size() is the number of commands found and path[i] is the
|
||||
// length of the ith command (copy length plus insert length).
|
||||
// Note that the sum of the lengths of all commands can be less than num_bytes.
|
||||
//
|
||||
// On return, the nodes[0..num_bytes] array will have the following
|
||||
// "ZopfliNode array invariant":
|
||||
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
||||
// (1) nodes[i].copy_length() >= 2
|
||||
// (2) nodes[i].command_length() <= i and
|
||||
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
|
||||
void ZopfliComputeShortestPath(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
Hashers::H10* hasher,
|
||||
ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path);
|
||||
|
||||
void ZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t max_backward_limit,
|
||||
const std::vector<uint32_t>& path,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_literals);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
|
|
@ -0,0 +1,161 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions to estimate the bit cost of Huffman trees.
|
||||
|
||||
#ifndef BROTLI_ENC_BIT_COST_H_
|
||||
#define BROTLI_ENC_BIT_COST_H_
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static inline double ShannonEntropy(const uint32_t *population, size_t size,
|
||||
size_t *total) {
|
||||
size_t sum = 0;
|
||||
double retval = 0;
|
||||
const uint32_t *population_end = population + size;
|
||||
size_t p;
|
||||
if (size & 1) {
|
||||
goto odd_number_of_elements_left;
|
||||
}
|
||||
while (population < population_end) {
|
||||
p = *population++;
|
||||
sum += p;
|
||||
retval -= static_cast<double>(p) * FastLog2(p);
|
||||
odd_number_of_elements_left:
|
||||
p = *population++;
|
||||
sum += p;
|
||||
retval -= static_cast<double>(p) * FastLog2(p);
|
||||
}
|
||||
if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
|
||||
*total = sum;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static inline double BitsEntropy(const uint32_t *population, size_t size) {
|
||||
size_t sum;
|
||||
double retval = ShannonEntropy(population, size, &sum);
|
||||
if (retval < sum) {
|
||||
// At least one bit per literal is needed.
|
||||
retval = static_cast<double>(sum);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
double PopulationCost(const Histogram<kSize>& histogram) {
|
||||
static const double kOneSymbolHistogramCost = 12;
|
||||
static const double kTwoSymbolHistogramCost = 20;
|
||||
static const double kThreeSymbolHistogramCost = 28;
|
||||
static const double kFourSymbolHistogramCost = 37;
|
||||
if (histogram.total_count_ == 0) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
int count = 0;
|
||||
int s[5];
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
s[count] = i;
|
||||
++count;
|
||||
if (count > 4) break;
|
||||
}
|
||||
}
|
||||
if (count == 1) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
if (count == 2) {
|
||||
return (kTwoSymbolHistogramCost +
|
||||
static_cast<double>(histogram.total_count_));
|
||||
}
|
||||
if (count == 3) {
|
||||
const uint32_t histo0 = histogram.data_[s[0]];
|
||||
const uint32_t histo1 = histogram.data_[s[1]];
|
||||
const uint32_t histo2 = histogram.data_[s[2]];
|
||||
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
|
||||
return (kThreeSymbolHistogramCost +
|
||||
2 * (histo0 + histo1 + histo2) - histomax);
|
||||
}
|
||||
if (count == 4) {
|
||||
uint32_t histo[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
histo[i] = histogram.data_[s[i]];
|
||||
}
|
||||
// Sort
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = i + 1; j < 4; ++j) {
|
||||
if (histo[j] > histo[i]) {
|
||||
std::swap(histo[j], histo[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const uint32_t h23 = histo[2] + histo[3];
|
||||
const uint32_t histomax = std::max(h23, histo[0]);
|
||||
return (kFourSymbolHistogramCost +
|
||||
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
||||
}
|
||||
|
||||
// In this loop we compute the entropy of the histogram and simultaneously
|
||||
// build a simplified histogram of the code length codes where we use the
|
||||
// zero repeat code 17, but we don't use the non-zero repeat code 16.
|
||||
double bits = 0;
|
||||
size_t max_depth = 1;
|
||||
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
|
||||
const double log2total = FastLog2(histogram.total_count_);
|
||||
for (size_t i = 0; i < kSize;) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
||||
// = log2(total_count) - log2(count(symbol))
|
||||
double log2p = log2total - FastLog2(histogram.data_[i]);
|
||||
// Approximate the bit depth by round(-log2(P(symbol)))
|
||||
size_t depth = static_cast<size_t>(log2p + 0.5);
|
||||
bits += histogram.data_[i] * log2p;
|
||||
if (depth > 15) {
|
||||
depth = 15;
|
||||
}
|
||||
if (depth > max_depth) {
|
||||
max_depth = depth;
|
||||
}
|
||||
++depth_histo[depth];
|
||||
++i;
|
||||
} else {
|
||||
// Compute the run length of zeros and add the appropriate number of 0 and
|
||||
// 17 code length codes to the code length code histogram.
|
||||
uint32_t reps = 1;
|
||||
for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
if (i == kSize) {
|
||||
// Don't add any cost for the last zero run, since these are encoded
|
||||
// only implicitly.
|
||||
break;
|
||||
}
|
||||
if (reps < 3) {
|
||||
depth_histo[0] += reps;
|
||||
} else {
|
||||
reps -= 2;
|
||||
while (reps > 0) {
|
||||
++depth_histo[17];
|
||||
// Add the 3 extra bits for the 17 code length code.
|
||||
bits += 3;
|
||||
reps >>= 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add the estimated encoding cost of the code length code histogram.
|
||||
bits += static_cast<double>(18 + 2 * max_depth);
|
||||
// Add the entropy of the code length code histogram.
|
||||
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
|
||||
return bits;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BIT_COST_H_
|
|
@ -0,0 +1,505 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Block split point selection utilities.
|
||||
|
||||
#include "./block_splitter.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
|
||||
#include "./cluster.h"
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const size_t kMaxLiteralHistograms = 100;
|
||||
static const size_t kMaxCommandHistograms = 50;
|
||||
static const double kLiteralBlockSwitchCost = 28.1;
|
||||
static const double kCommandBlockSwitchCost = 13.5;
|
||||
static const double kDistanceBlockSwitchCost = 14.6;
|
||||
static const size_t kLiteralStrideLength = 70;
|
||||
static const size_t kCommandStrideLength = 40;
|
||||
static const size_t kSymbolsPerLiteralHistogram = 544;
|
||||
static const size_t kSymbolsPerCommandHistogram = 530;
|
||||
static const size_t kSymbolsPerDistanceHistogram = 544;
|
||||
static const size_t kMinLengthForBlockSplitting = 128;
|
||||
static const size_t kIterMulForRefining = 2;
|
||||
static const size_t kMinItersForRefining = 100;
|
||||
|
||||
void CopyLiteralsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
std::vector<uint8_t>* literals) {
|
||||
// Count how many we have.
|
||||
size_t total_length = 0;
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
total_length += cmds[i].insert_len_;
|
||||
}
|
||||
if (total_length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate.
|
||||
literals->resize(total_length);
|
||||
|
||||
// Loop again, and copy this time.
|
||||
size_t pos = 0;
|
||||
size_t from_pos = offset & mask;
|
||||
for (size_t i = 0; i < num_commands && pos < total_length; ++i) {
|
||||
size_t insert_len = cmds[i].insert_len_;
|
||||
if (from_pos + insert_len > mask) {
|
||||
size_t head_size = mask + 1 - from_pos;
|
||||
memcpy(&(*literals)[pos], data + from_pos, head_size);
|
||||
from_pos = 0;
|
||||
pos += head_size;
|
||||
insert_len -= head_size;
|
||||
}
|
||||
if (insert_len > 0) {
|
||||
memcpy(&(*literals)[pos], data + from_pos, insert_len);
|
||||
pos += insert_len;
|
||||
}
|
||||
from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
inline static unsigned int MyRand(unsigned int* seed) {
|
||||
*seed *= 16807U;
|
||||
if (*seed == 0) {
|
||||
*seed = 1;
|
||||
}
|
||||
return *seed;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void InitialEntropyCodes(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
histograms[i].Clear();
|
||||
}
|
||||
unsigned int seed = 7;
|
||||
size_t block_length = length / num_histograms;
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
size_t pos = length * i / num_histograms;
|
||||
if (i != 0) {
|
||||
pos += MyRand(&seed) % block_length;
|
||||
}
|
||||
if (pos + stride >= length) {
|
||||
pos = length - stride - 1;
|
||||
}
|
||||
histograms[i].Add(data + pos, stride);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void RandomSample(unsigned int* seed,
|
||||
const DataType* data,
|
||||
size_t length,
|
||||
size_t stride,
|
||||
HistogramType* sample) {
|
||||
size_t pos = 0;
|
||||
if (stride >= length) {
|
||||
pos = 0;
|
||||
stride = length;
|
||||
} else {
|
||||
pos = MyRand(seed) % (length - stride + 1);
|
||||
}
|
||||
sample->Add(data + pos, stride);
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void RefineEntropyCodes(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
size_t iters =
|
||||
kIterMulForRefining * length / stride + kMinItersForRefining;
|
||||
unsigned int seed = 7;
|
||||
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
|
||||
for (size_t iter = 0; iter < iters; ++iter) {
|
||||
HistogramType sample;
|
||||
RandomSample(&seed, data, length, stride, &sample);
|
||||
size_t ix = iter % num_histograms;
|
||||
histograms[ix].AddHistogram(sample);
|
||||
}
|
||||
}
|
||||
|
||||
inline static double BitCost(size_t count) {
|
||||
return count == 0 ? -2.0 : FastLog2(count);
|
||||
}
|
||||
|
||||
// Assigns a block id from the range [0, vec.size()) to each data element
|
||||
// in data[0..length) and fills in block_id[0..length) with the assigned values.
|
||||
// Returns the number of blocks, i.e. one plus the number of block switches.
|
||||
template<typename DataType, int kSize>
|
||||
size_t FindBlocks(const DataType* data, const size_t length,
|
||||
const double block_switch_bitcost,
|
||||
const size_t num_histograms,
|
||||
const Histogram<kSize>* histograms,
|
||||
double* insert_cost,
|
||||
double* cost,
|
||||
uint8_t* switch_signal,
|
||||
uint8_t *block_id) {
|
||||
if (num_histograms <= 1) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
block_id[i] = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
assert(num_histograms <= 256);
|
||||
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
|
||||
for (size_t j = 0; j < num_histograms; ++j) {
|
||||
insert_cost[j] = FastLog2(static_cast<uint32_t>(
|
||||
histograms[j].total_count_));
|
||||
}
|
||||
for (size_t i = kSize; i != 0;) {
|
||||
--i;
|
||||
for (size_t j = 0; j < num_histograms; ++j) {
|
||||
insert_cost[i * num_histograms + j] =
|
||||
insert_cost[j] - BitCost(histograms[j].data_[i]);
|
||||
}
|
||||
}
|
||||
memset(cost, 0, sizeof(cost[0]) * num_histograms);
|
||||
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
|
||||
// After each iteration of this loop, cost[k] will contain the difference
|
||||
// between the minimum cost of arriving at the current byte position using
|
||||
// entropy code k, and the minimum cost of arriving at the current byte
|
||||
// position. This difference is capped at the block switch cost, and if it
|
||||
// reaches block switch cost, it means that when we trace back from the last
|
||||
// position, we need to switch here.
|
||||
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
size_t insert_cost_ix = data[byte_ix] * num_histograms;
|
||||
double min_cost = 1e99;
|
||||
for (size_t k = 0; k < num_histograms; ++k) {
|
||||
// We are coding the symbol in data[byte_ix] with entropy code k.
|
||||
cost[k] += insert_cost[insert_cost_ix + k];
|
||||
if (cost[k] < min_cost) {
|
||||
min_cost = cost[k];
|
||||
block_id[byte_ix] = static_cast<uint8_t>(k);
|
||||
}
|
||||
}
|
||||
double block_switch_cost = block_switch_bitcost;
|
||||
// More blocks for the beginning.
|
||||
if (byte_ix < 2000) {
|
||||
block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
|
||||
}
|
||||
for (size_t k = 0; k < num_histograms; ++k) {
|
||||
cost[k] -= min_cost;
|
||||
if (cost[k] >= block_switch_cost) {
|
||||
cost[k] = block_switch_cost;
|
||||
const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
|
||||
assert((k >> 3) < bitmaplen);
|
||||
switch_signal[ix + (k >> 3)] |= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now trace back from the last position and switch at the marked places.
|
||||
size_t byte_ix = length - 1;
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
uint8_t cur_id = block_id[byte_ix];
|
||||
size_t num_blocks = 1;
|
||||
while (byte_ix > 0) {
|
||||
--byte_ix;
|
||||
ix -= bitmaplen;
|
||||
const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
|
||||
assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
|
||||
if (switch_signal[ix + (cur_id >> 3)] & mask) {
|
||||
if (cur_id != block_id[byte_ix]) {
|
||||
cur_id = block_id[byte_ix];
|
||||
++num_blocks;
|
||||
}
|
||||
}
|
||||
block_id[byte_ix] = cur_id;
|
||||
}
|
||||
return num_blocks;
|
||||
}
|
||||
|
||||
static size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
|
||||
uint16_t* new_id, const size_t num_histograms) {
|
||||
static const uint16_t kInvalidId = 256;
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
new_id[i] = kInvalidId;
|
||||
}
|
||||
uint16_t next_id = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
assert(block_ids[i] < num_histograms);
|
||||
if (new_id[block_ids[i]] == kInvalidId) {
|
||||
new_id[block_ids[i]] = next_id++;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
|
||||
assert(block_ids[i] < num_histograms);
|
||||
}
|
||||
assert(next_id <= num_histograms);
|
||||
return next_id;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void BuildBlockHistograms(const DataType* data, const size_t length,
|
||||
const uint8_t* block_ids,
|
||||
const size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
histograms[i].Clear();
|
||||
}
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
histograms[block_ids[i]].Add(data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void ClusterBlocks(const DataType* data, const size_t length,
|
||||
const size_t num_blocks,
|
||||
uint8_t* block_ids,
|
||||
BlockSplit* split) {
|
||||
static const size_t kMaxNumberOfBlockTypes = 256;
|
||||
static const size_t kHistogramsPerBatch = 64;
|
||||
static const size_t kClustersPerBatch = 16;
|
||||
std::vector<uint32_t> histogram_symbols(num_blocks);
|
||||
std::vector<uint32_t> block_lengths(num_blocks);
|
||||
|
||||
size_t block_idx = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
assert(block_idx < num_blocks);
|
||||
++block_lengths[block_idx];
|
||||
if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
|
||||
++block_idx;
|
||||
}
|
||||
}
|
||||
assert(block_idx == num_blocks);
|
||||
|
||||
const size_t expected_num_clusters =
|
||||
kClustersPerBatch *
|
||||
(num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
|
||||
std::vector<HistogramType> all_histograms;
|
||||
std::vector<uint32_t> cluster_size;
|
||||
all_histograms.reserve(expected_num_clusters);
|
||||
cluster_size.reserve(expected_num_clusters);
|
||||
size_t num_clusters = 0;
|
||||
std::vector<HistogramType> histograms(
|
||||
std::min(num_blocks, kHistogramsPerBatch));
|
||||
size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
|
||||
std::vector<HistogramPair> pairs(max_num_pairs + 1);
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
|
||||
const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
|
||||
uint32_t sizes[kHistogramsPerBatch];
|
||||
uint32_t clusters[kHistogramsPerBatch];
|
||||
uint32_t symbols[kHistogramsPerBatch];
|
||||
uint32_t remap[kHistogramsPerBatch];
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
histograms[j].Clear();
|
||||
for (size_t k = 0; k < block_lengths[i + j]; ++k) {
|
||||
histograms[j].Add(data[pos++]);
|
||||
}
|
||||
histograms[j].bit_cost_ = PopulationCost(histograms[j]);
|
||||
symbols[j] = clusters[j] = static_cast<uint32_t>(j);
|
||||
sizes[j] = 1;
|
||||
}
|
||||
size_t num_new_clusters = HistogramCombine(
|
||||
&histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
|
||||
num_to_combine, kHistogramsPerBatch, max_num_pairs);
|
||||
for (size_t j = 0; j < num_new_clusters; ++j) {
|
||||
all_histograms.push_back(histograms[clusters[j]]);
|
||||
cluster_size.push_back(sizes[clusters[j]]);
|
||||
remap[clusters[j]] = static_cast<uint32_t>(j);
|
||||
}
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
histogram_symbols[i + j] =
|
||||
static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
|
||||
}
|
||||
num_clusters += num_new_clusters;
|
||||
assert(num_clusters == cluster_size.size());
|
||||
assert(num_clusters == all_histograms.size());
|
||||
}
|
||||
|
||||
max_num_pairs =
|
||||
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
pairs.resize(max_num_pairs + 1);
|
||||
|
||||
std::vector<uint32_t> clusters(num_clusters);
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
clusters[i] = static_cast<uint32_t>(i);
|
||||
}
|
||||
size_t num_final_clusters =
|
||||
HistogramCombine(&all_histograms[0], &cluster_size[0],
|
||||
&histogram_symbols[0],
|
||||
&clusters[0], &pairs[0], num_clusters,
|
||||
num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
|
||||
|
||||
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
|
||||
std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
|
||||
uint32_t next_index = 0;
|
||||
pos = 0;
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
HistogramType histo;
|
||||
for (size_t j = 0; j < block_lengths[i]; ++j) {
|
||||
histo.Add(data[pos++]);
|
||||
}
|
||||
uint32_t best_out =
|
||||
i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
|
||||
double best_bits = HistogramBitCostDistance(
|
||||
histo, all_histograms[best_out]);
|
||||
for (size_t j = 0; j < num_final_clusters; ++j) {
|
||||
const double cur_bits = HistogramBitCostDistance(
|
||||
histo, all_histograms[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
histogram_symbols[i] = best_out;
|
||||
if (new_index[best_out] == kInvalidIndex) {
|
||||
new_index[best_out] = next_index++;
|
||||
}
|
||||
}
|
||||
uint8_t max_type = 0;
|
||||
uint32_t cur_length = 0;
|
||||
block_idx = 0;
|
||||
split->types.resize(num_blocks);
|
||||
split->lengths.resize(num_blocks);
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
cur_length += block_lengths[i];
|
||||
if (i + 1 == num_blocks ||
|
||||
histogram_symbols[i] != histogram_symbols[i + 1]) {
|
||||
const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
|
||||
split->types[block_idx] = id;
|
||||
split->lengths[block_idx] = cur_length;
|
||||
max_type = std::max(max_type, id);
|
||||
cur_length = 0;
|
||||
++block_idx;
|
||||
}
|
||||
}
|
||||
split->types.resize(block_idx);
|
||||
split->lengths.resize(block_idx);
|
||||
split->num_types = static_cast<size_t>(max_type) + 1;
|
||||
}
|
||||
|
||||
template<int kSize, typename DataType>
|
||||
void SplitByteVector(const std::vector<DataType>& data,
|
||||
const size_t literals_per_histogram,
|
||||
const size_t max_histograms,
|
||||
const size_t sampling_stride_length,
|
||||
const double block_switch_cost,
|
||||
BlockSplit* split) {
|
||||
if (data.empty()) {
|
||||
split->num_types = 1;
|
||||
return;
|
||||
} else if (data.size() < kMinLengthForBlockSplitting) {
|
||||
split->num_types = 1;
|
||||
split->types.push_back(0);
|
||||
split->lengths.push_back(static_cast<uint32_t>(data.size()));
|
||||
return;
|
||||
}
|
||||
size_t num_histograms = data.size() / literals_per_histogram + 1;
|
||||
if (num_histograms > max_histograms) {
|
||||
num_histograms = max_histograms;
|
||||
}
|
||||
Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
|
||||
// Find good entropy codes.
|
||||
InitialEntropyCodes(&data[0], data.size(),
|
||||
sampling_stride_length,
|
||||
num_histograms, histograms);
|
||||
RefineEntropyCodes(&data[0], data.size(),
|
||||
sampling_stride_length,
|
||||
num_histograms, histograms);
|
||||
// Find a good path through literals with the good entropy codes.
|
||||
std::vector<uint8_t> block_ids(data.size());
|
||||
size_t num_blocks;
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
double* insert_cost = new double[kSize * num_histograms];
|
||||
double *cost = new double[num_histograms];
|
||||
uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
|
||||
uint16_t* new_id = new uint16_t[num_histograms];
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
num_blocks = FindBlocks(&data[0], data.size(),
|
||||
block_switch_cost,
|
||||
num_histograms, histograms,
|
||||
insert_cost, cost, switch_signal,
|
||||
&block_ids[0]);
|
||||
num_histograms = RemapBlockIds(&block_ids[0], data.size(),
|
||||
new_id, num_histograms);
|
||||
BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
|
||||
num_histograms, histograms);
|
||||
}
|
||||
delete[] insert_cost;
|
||||
delete[] cost;
|
||||
delete[] switch_signal;
|
||||
delete[] new_id;
|
||||
delete[] histograms;
|
||||
ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
|
||||
&block_ids[0], split);
|
||||
}
|
||||
|
||||
void SplitBlock(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split) {
|
||||
{
|
||||
// Create a continuous array of literals.
|
||||
std::vector<uint8_t> literals;
|
||||
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
|
||||
// Create the block split on the array of literals.
|
||||
// Literal histograms have alphabet size 256.
|
||||
SplitByteVector<256>(
|
||||
literals,
|
||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||
kLiteralStrideLength, kLiteralBlockSwitchCost,
|
||||
literal_split);
|
||||
}
|
||||
|
||||
{
|
||||
// Compute prefix codes for commands.
|
||||
std::vector<uint16_t> insert_and_copy_codes(num_commands);
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
|
||||
}
|
||||
// Create the block split on the array of command prefixes.
|
||||
SplitByteVector<kNumCommandPrefixes>(
|
||||
insert_and_copy_codes,
|
||||
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kCommandBlockSwitchCost,
|
||||
insert_and_copy_split);
|
||||
}
|
||||
|
||||
{
|
||||
// Create a continuous array of distance prefixes.
|
||||
std::vector<uint16_t> distance_prefixes(num_commands);
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const Command& cmd = cmds[i];
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
distance_prefixes[pos++] = cmd.dist_prefix_;
|
||||
}
|
||||
}
|
||||
distance_prefixes.resize(pos);
|
||||
// Create the block split on the array of distance prefixes.
|
||||
SplitByteVector<kNumDistancePrefixes>(
|
||||
distance_prefixes,
|
||||
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kDistanceBlockSwitchCost,
|
||||
dist_split);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,61 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Block split point selection utilities.
|
||||
|
||||
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
#define BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "./command.h"
|
||||
#include "./metablock.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
struct BlockSplitIterator {
|
||||
explicit BlockSplitIterator(const BlockSplit& split)
|
||||
: split_(split), idx_(0), type_(0), length_(0) {
|
||||
if (!split.lengths.empty()) {
|
||||
length_ = split.lengths[0];
|
||||
}
|
||||
}
|
||||
|
||||
void Next(void) {
|
||||
if (length_ == 0) {
|
||||
++idx_;
|
||||
type_ = split_.types[idx_];
|
||||
length_ = split_.lengths[idx_];
|
||||
}
|
||||
--length_;
|
||||
}
|
||||
|
||||
const BlockSplit& split_;
|
||||
size_t idx_;
|
||||
size_t type_;
|
||||
size_t length_;
|
||||
};
|
||||
|
||||
void CopyLiteralsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
std::vector<uint8_t>* literals);
|
||||
|
||||
void SplitBlock(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,179 @@
|
|||
/* Copyright 2014 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions to convert brotli-related data structures into the
|
||||
// brotli bit stream. The functions here operate under
|
||||
// assumption that there is enough space in the storage, i.e., there are
|
||||
// no out-of-range checks anywhere.
|
||||
//
|
||||
// These functions do bit addressing into a byte array. The byte array
|
||||
// is called "storage" and the index to the bit is called storage_ix
|
||||
// in function arguments.
|
||||
|
||||
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
#include "./metablock.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// All Store functions here will use a storage_ix, which is always the bit
|
||||
// position for the current storage.
|
||||
|
||||
// Stores a number between 0 and 255.
|
||||
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
// Stores the compressed meta-block header.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreCompressedMetaBlockHeader(bool final_block,
|
||||
size_t length,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores the uncompressed meta-block header.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreUncompressedMetaBlockHeader(size_t length,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores a context map where the histogram type is always the block type.
|
||||
void StoreTrivialContextMap(size_t num_types,
|
||||
size_t context_bits,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
const int num_codes,
|
||||
const uint8_t *code_length_bitdepth,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
|
||||
size_t *storage_ix, uint8_t *storage);
|
||||
|
||||
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
|
||||
// bits[0:length] and stores the encoded tree to the bit stream.
|
||||
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
|
||||
const size_t length,
|
||||
HuffmanTree* tree,
|
||||
uint8_t* depth,
|
||||
uint16_t* bits,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
const size_t histogram_total,
|
||||
const size_t max_bits,
|
||||
uint8_t* depth,
|
||||
uint16_t* bits,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Encodes the given context map to the bit stream. The number of different
|
||||
// histogram ids is given by num_clusters.
|
||||
void EncodeContextMap(const std::vector<uint32_t>& context_map,
|
||||
size_t num_clusters,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
// Data structure that stores everything that is needed to encode each block
|
||||
// switch command.
|
||||
struct BlockSplitCode {
|
||||
std::vector<uint32_t> type_code;
|
||||
std::vector<uint32_t> length_prefix;
|
||||
std::vector<uint32_t> length_nextra;
|
||||
std::vector<uint32_t> length_extra;
|
||||
std::vector<uint8_t> type_depths;
|
||||
std::vector<uint16_t> type_bits;
|
||||
uint8_t length_depths[kNumBlockLenPrefixes];
|
||||
uint16_t length_bits[kNumBlockLenPrefixes];
|
||||
};
|
||||
|
||||
// Builds a BlockSplitCode data structure from the block split given by the
|
||||
// vector of block types and block lengths and stores it to the bit stream.
|
||||
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
const std::vector<uint32_t>& lengths,
|
||||
const size_t num_types,
|
||||
BlockSplitCode* code,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores the block switch command with index block_ix to the bit stream.
|
||||
void StoreBlockSwitch(const BlockSplitCode& code,
|
||||
const size_t block_ix,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreMetaBlock(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
bool final_block,
|
||||
uint32_t num_direct_distance_codes,
|
||||
uint32_t distance_postfix_bits,
|
||||
ContextType literal_context_mode,
|
||||
const brotli::Command *commands,
|
||||
size_t n_commands,
|
||||
const MetaBlockSplit& mb,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
// Stores the meta-block without doing any block splitting, just collects
|
||||
// one histogram per block category and uses that for entropy coding.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreMetaBlockTrivial(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
bool is_last,
|
||||
const brotli::Command *commands,
|
||||
size_t n_commands,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
// Same as above, but uses static prefix codes for histograms with a only a few
|
||||
// symbols, and uses static code length prefix codes for all other histograms.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreMetaBlockFast(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
bool is_last,
|
||||
const brotli::Command *commands,
|
||||
size_t n_commands,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
// This is for storing uncompressed blocks (simple raw storage of
|
||||
// bytes-as-bytes).
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreUncompressedMetaBlock(bool final_block,
|
||||
const uint8_t* input,
|
||||
size_t position, size_t mask,
|
||||
size_t len,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores an empty metadata meta-block and syncs to a byte boundary.
|
||||
void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
|
@ -0,0 +1,330 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions for clustering similar histograms together.
|
||||
|
||||
#ifndef BROTLI_ENC_CLUSTER_H_
|
||||
#define BROTLI_ENC_CLUSTER_H_
|
||||
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "./bit_cost.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
struct HistogramPair {
|
||||
uint32_t idx1;
|
||||
uint32_t idx2;
|
||||
double cost_combo;
|
||||
double cost_diff;
|
||||
};
|
||||
|
||||
inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
|
||||
if (p1.cost_diff != p2.cost_diff) {
|
||||
return p1.cost_diff > p2.cost_diff;
|
||||
}
|
||||
return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
|
||||
}
|
||||
|
||||
// Returns entropy reduction of the context map when we combine two clusters.
|
||||
inline double ClusterCostDiff(size_t size_a, size_t size_b) {
|
||||
size_t size_c = size_a + size_b;
|
||||
return static_cast<double>(size_a) * FastLog2(size_a) +
|
||||
static_cast<double>(size_b) * FastLog2(size_b) -
|
||||
static_cast<double>(size_c) * FastLog2(size_c);
|
||||
}
|
||||
|
||||
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
|
||||
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
|
||||
template<typename HistogramType>
|
||||
void CompareAndPushToQueue(const HistogramType* out,
|
||||
const uint32_t* cluster_size,
|
||||
uint32_t idx1, uint32_t idx2,
|
||||
size_t max_num_pairs,
|
||||
HistogramPair* pairs,
|
||||
size_t* num_pairs) {
|
||||
if (idx1 == idx2) {
|
||||
return;
|
||||
}
|
||||
if (idx2 < idx1) {
|
||||
uint32_t t = idx2;
|
||||
idx2 = idx1;
|
||||
idx1 = t;
|
||||
}
|
||||
bool store_pair = false;
|
||||
HistogramPair p;
|
||||
p.idx1 = idx1;
|
||||
p.idx2 = idx2;
|
||||
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
|
||||
p.cost_diff -= out[idx1].bit_cost_;
|
||||
p.cost_diff -= out[idx2].bit_cost_;
|
||||
|
||||
if (out[idx1].total_count_ == 0) {
|
||||
p.cost_combo = out[idx2].bit_cost_;
|
||||
store_pair = true;
|
||||
} else if (out[idx2].total_count_ == 0) {
|
||||
p.cost_combo = out[idx1].bit_cost_;
|
||||
store_pair = true;
|
||||
} else {
|
||||
double threshold = *num_pairs == 0 ? 1e99 :
|
||||
std::max(0.0, pairs[0].cost_diff);
|
||||
HistogramType combo = out[idx1];
|
||||
combo.AddHistogram(out[idx2]);
|
||||
double cost_combo = PopulationCost(combo);
|
||||
if (cost_combo < threshold - p.cost_diff) {
|
||||
p.cost_combo = cost_combo;
|
||||
store_pair = true;
|
||||
}
|
||||
}
|
||||
if (store_pair) {
|
||||
p.cost_diff += p.cost_combo;
|
||||
if (*num_pairs > 0 && pairs[0] < p) {
|
||||
// Replace the top of the queue if needed.
|
||||
if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = pairs[0];
|
||||
++(*num_pairs);
|
||||
}
|
||||
pairs[0] = p;
|
||||
} else if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = p;
|
||||
++(*num_pairs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType>
|
||||
size_t HistogramCombine(HistogramType* out,
|
||||
uint32_t* cluster_size,
|
||||
uint32_t* symbols,
|
||||
uint32_t* clusters,
|
||||
HistogramPair* pairs,
|
||||
size_t num_clusters,
|
||||
size_t symbols_size,
|
||||
size_t max_clusters,
|
||||
size_t max_num_pairs) {
|
||||
double cost_diff_threshold = 0.0;
|
||||
size_t min_cluster_size = 1;
|
||||
|
||||
// We maintain a vector of histogram pairs, with the property that the pair
|
||||
// with the maximum bit cost reduction is the first.
|
||||
size_t num_pairs = 0;
|
||||
for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
|
||||
for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
|
||||
CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
|
||||
max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
|
||||
while (num_clusters > min_cluster_size) {
|
||||
if (pairs[0].cost_diff >= cost_diff_threshold) {
|
||||
cost_diff_threshold = 1e99;
|
||||
min_cluster_size = max_clusters;
|
||||
continue;
|
||||
}
|
||||
// Take the best pair from the top of heap.
|
||||
uint32_t best_idx1 = pairs[0].idx1;
|
||||
uint32_t best_idx2 = pairs[0].idx2;
|
||||
out[best_idx1].AddHistogram(out[best_idx2]);
|
||||
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
|
||||
cluster_size[best_idx1] += cluster_size[best_idx2];
|
||||
for (size_t i = 0; i < symbols_size; ++i) {
|
||||
if (symbols[i] == best_idx2) {
|
||||
symbols[i] = best_idx1;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
if (clusters[i] == best_idx2) {
|
||||
memmove(&clusters[i], &clusters[i + 1],
|
||||
(num_clusters - i - 1) * sizeof(clusters[0]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
--num_clusters;
|
||||
// Remove pairs intersecting the just combined best pair.
|
||||
size_t copy_to_idx = 0;
|
||||
for (size_t i = 0; i < num_pairs; ++i) {
|
||||
HistogramPair& p = pairs[i];
|
||||
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
|
||||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
|
||||
// Remove invalid pair from the queue.
|
||||
continue;
|
||||
}
|
||||
if (pairs[0] < p) {
|
||||
// Replace the top of the queue if needed.
|
||||
HistogramPair front = pairs[0];
|
||||
pairs[0] = p;
|
||||
pairs[copy_to_idx] = front;
|
||||
} else {
|
||||
pairs[copy_to_idx] = p;
|
||||
}
|
||||
++copy_to_idx;
|
||||
}
|
||||
num_pairs = copy_to_idx;
|
||||
|
||||
// Push new pairs formed with the combined histogram to the heap.
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
|
||||
max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
return num_clusters;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Histogram refinement
|
||||
|
||||
// What is the bit cost of moving histogram from cur_symbol to candidate.
|
||||
template<typename HistogramType>
|
||||
double HistogramBitCostDistance(const HistogramType& histogram,
|
||||
const HistogramType& candidate) {
|
||||
if (histogram.total_count_ == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
HistogramType tmp = histogram;
|
||||
tmp.AddHistogram(candidate);
|
||||
return PopulationCost(tmp) - candidate.bit_cost_;
|
||||
}
|
||||
|
||||
// Find the best 'out' histogram for each of the 'in' histograms.
|
||||
// When called, clusters[0..num_clusters) contains the unique values from
|
||||
// symbols[0..in_size), but this property is not preserved in this function.
|
||||
// Note: we assume that out[]->bit_cost_ is already up-to-date.
|
||||
template<typename HistogramType>
|
||||
void HistogramRemap(const HistogramType* in, size_t in_size,
|
||||
const uint32_t* clusters, size_t num_clusters,
|
||||
HistogramType* out, uint32_t* symbols) {
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
|
||||
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
|
||||
for (size_t j = 0; j < num_clusters; ++j) {
|
||||
const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
symbols[i] = best_out;
|
||||
}
|
||||
|
||||
// Recompute each out based on raw and symbols.
|
||||
for (size_t j = 0; j < num_clusters; ++j) {
|
||||
out[clusters[j]].Clear();
|
||||
}
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
out[symbols[i]].AddHistogram(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Reorders elements of the out[0..length) array and changes values in
|
||||
// symbols[0..length) array in the following way:
|
||||
// * when called, symbols[] contains indexes into out[], and has N unique
|
||||
// values (possibly N < length)
|
||||
// * on return, symbols'[i] = f(symbols[i]) and
|
||||
// out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
|
||||
// where f is a bijection between the range of symbols[] and [0..N), and
|
||||
// the first occurrences of values in symbols'[i] come in consecutive
|
||||
// increasing order.
|
||||
// Returns N, the number of unique values in symbols[].
|
||||
template<typename HistogramType>
|
||||
size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
|
||||
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
|
||||
std::vector<uint32_t> new_index(length, kInvalidIndex);
|
||||
uint32_t next_index = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == kInvalidIndex) {
|
||||
new_index[symbols[i]] = next_index;
|
||||
++next_index;
|
||||
}
|
||||
}
|
||||
std::vector<HistogramType> tmp(next_index);
|
||||
next_index = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == next_index) {
|
||||
tmp[next_index] = out[symbols[i]];
|
||||
++next_index;
|
||||
}
|
||||
symbols[i] = new_index[symbols[i]];
|
||||
}
|
||||
for (size_t i = 0; i < next_index; ++i) {
|
||||
out[i] = tmp[i];
|
||||
}
|
||||
return next_index;
|
||||
}
|
||||
|
||||
// Clusters similar histograms in 'in' together, the selected histograms are
|
||||
// placed in 'out', and for each index in 'in', *histogram_symbols will
|
||||
// indicate which of the 'out' histograms is the best approximation.
|
||||
template<typename HistogramType>
|
||||
void ClusterHistograms(const std::vector<HistogramType>& in,
|
||||
size_t num_contexts, size_t num_blocks,
|
||||
size_t max_histograms,
|
||||
std::vector<HistogramType>* out,
|
||||
std::vector<uint32_t>* histogram_symbols) {
|
||||
const size_t in_size = num_contexts * num_blocks;
|
||||
assert(in_size == in.size());
|
||||
std::vector<uint32_t> cluster_size(in_size, 1);
|
||||
std::vector<uint32_t> clusters(in_size);
|
||||
size_t num_clusters = 0;
|
||||
out->resize(in_size);
|
||||
histogram_symbols->resize(in_size);
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
(*out)[i] = in[i];
|
||||
(*out)[i].bit_cost_ = PopulationCost(in[i]);
|
||||
(*histogram_symbols)[i] = static_cast<uint32_t>(i);
|
||||
}
|
||||
|
||||
const size_t max_input_histograms = 64;
|
||||
// For the first pass of clustering, we allow all pairs.
|
||||
size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
|
||||
std::vector<HistogramPair> pairs(max_num_pairs + 1);
|
||||
|
||||
for (size_t i = 0; i < in_size; i += max_input_histograms) {
|
||||
size_t num_to_combine = std::min(in_size - i, max_input_histograms);
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
|
||||
}
|
||||
size_t num_new_clusters =
|
||||
HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[i],
|
||||
&clusters[num_clusters], &pairs[0],
|
||||
num_to_combine, num_to_combine,
|
||||
max_histograms, max_num_pairs);
|
||||
num_clusters += num_new_clusters;
|
||||
}
|
||||
|
||||
// For the second pass, we limit the total number of histogram pairs.
|
||||
// After this limit is reached, we only keep searching for the best pair.
|
||||
max_num_pairs =
|
||||
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
pairs.resize(max_num_pairs + 1);
|
||||
|
||||
// Collapse similar histograms.
|
||||
num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[0], &clusters[0],
|
||||
&pairs[0], num_clusters, in_size,
|
||||
max_histograms, max_num_pairs);
|
||||
|
||||
// Find the optimal map from original histograms to the final ones.
|
||||
HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
|
||||
&(*out)[0], &(*histogram_symbols)[0]);
|
||||
|
||||
// Convert the context map to a canonical form.
|
||||
size_t num_histograms =
|
||||
HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
|
||||
out->resize(num_histograms);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_CLUSTER_H_
|
|
@ -0,0 +1,156 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// This class models a sequence of literals and a backward reference copy.
|
||||
|
||||
#ifndef BROTLI_ENC_COMMAND_H_
|
||||
#define BROTLI_ENC_COMMAND_H_
|
||||
|
||||
#include "./fast_log.h"
|
||||
#include "./prefix.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static uint32_t kInsBase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
|
||||
66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
|
||||
static uint32_t kInsExtra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
|
||||
5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
|
||||
static uint32_t kCopyBase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
|
||||
38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
|
||||
static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
|
||||
|
||||
static inline uint16_t GetInsertLengthCode(size_t insertlen) {
|
||||
if (insertlen < 6) {
|
||||
return static_cast<uint16_t>(insertlen);
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
|
||||
} else if (insertlen < 2114) {
|
||||
return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
|
||||
} else if (insertlen < 6210) {
|
||||
return 21u;
|
||||
} else if (insertlen < 22594) {
|
||||
return 22u;
|
||||
} else {
|
||||
return 23u;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint16_t GetCopyLengthCode(size_t copylen) {
|
||||
if (copylen < 10) {
|
||||
return static_cast<uint16_t>(copylen - 2);
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
|
||||
return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
|
||||
} else if (copylen < 2118) {
|
||||
return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
|
||||
} else {
|
||||
return 23u;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint16_t CombineLengthCodes(
|
||||
uint16_t inscode, uint16_t copycode, bool use_last_distance) {
|
||||
uint16_t bits64 =
|
||||
static_cast<uint16_t>((copycode & 0x7u) | ((inscode & 0x7u) << 3));
|
||||
if (use_last_distance && inscode < 8 && copycode < 16) {
|
||||
return (copycode < 8) ? bits64 : (bits64 | 64);
|
||||
} else {
|
||||
// "To convert an insert-and-copy length code to an insert length code and
|
||||
// a copy length code, the following table can be used"
|
||||
static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
|
||||
448u, 576u, 640u };
|
||||
return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void GetLengthCode(size_t insertlen, size_t copylen,
|
||||
bool use_last_distance,
|
||||
uint16_t* code) {
|
||||
uint16_t inscode = GetInsertLengthCode(insertlen);
|
||||
uint16_t copycode = GetCopyLengthCode(copylen);
|
||||
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
|
||||
}
|
||||
|
||||
static inline uint32_t GetInsertBase(uint16_t inscode) {
|
||||
return kInsBase[inscode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetInsertExtra(uint16_t inscode) {
|
||||
return kInsExtra[inscode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetCopyBase(uint16_t copycode) {
|
||||
return kCopyBase[copycode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetCopyExtra(uint16_t copycode) {
|
||||
return kCopyExtra[copycode];
|
||||
}
|
||||
|
||||
struct Command {
|
||||
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
|
||||
Command(size_t insertlen, size_t copylen, size_t copylen_code,
|
||||
size_t distance_code)
|
||||
: insert_len_(static_cast<uint32_t>(insertlen)) {
|
||||
copy_len_ = static_cast<uint32_t>(
|
||||
copylen | ((copylen_code ^ copylen) << 24));
|
||||
// The distance prefix and extra bits are stored in this Command as if
|
||||
// npostfix and ndirect were 0, they are only recomputed later after the
|
||||
// clustering if needed.
|
||||
PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
|
||||
GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
|
||||
&cmd_prefix_);
|
||||
}
|
||||
|
||||
explicit Command(size_t insertlen)
|
||||
: insert_len_(static_cast<uint32_t>(insertlen))
|
||||
, copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
|
||||
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
|
||||
}
|
||||
|
||||
uint32_t DistanceCode(void) const {
|
||||
if (dist_prefix_ < 16) {
|
||||
return dist_prefix_;
|
||||
}
|
||||
uint32_t nbits = dist_extra_ >> 24;
|
||||
uint32_t extra = dist_extra_ & 0xffffff;
|
||||
uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
|
||||
return (prefix << nbits) + extra + 12;
|
||||
}
|
||||
|
||||
uint32_t DistanceContext(void) const {
|
||||
uint32_t r = cmd_prefix_ >> 6;
|
||||
uint32_t c = cmd_prefix_ & 7;
|
||||
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
|
||||
return c;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
inline uint32_t copy_len(void) const {
|
||||
return copy_len_ & 0xFFFFFF;
|
||||
}
|
||||
|
||||
inline uint32_t copy_len_code(void) const {
|
||||
return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
|
||||
}
|
||||
|
||||
uint32_t insert_len_;
|
||||
/* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
|
||||
uint32_t copy_len_;
|
||||
uint32_t dist_extra_;
|
||||
uint16_t cmd_prefix_;
|
||||
uint16_t dist_prefix_;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMMAND_H_
|
|
@ -0,0 +1,701 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses one-pass processing: when we find a backward
|
||||
// match, we immediately emit the corresponding command and literal codes to
|
||||
// the bit stream.
|
||||
//
|
||||
// Adapted from the CompressFragment() function in
|
||||
// https://github.com/google/snappy/blob/master/snappy.cc
|
||||
|
||||
#include "./compress_fragment.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 3);
|
||||
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4]);
|
||||
}
|
||||
|
||||
// Builds a literal prefix code into "depths" and "bits" based on the statistics
|
||||
// of the "input" string and stores it into the bit stream.
|
||||
// Note that the prefix code here is built from the pre-LZ77 input, therefore
|
||||
// we can only approximate the statistics of the actual literal stream.
|
||||
// Moreover, for long inputs we build a histogram from a sample of the input
|
||||
// and thus have to assign a non-zero depth for each literal.
|
||||
static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
const size_t input_size,
|
||||
uint8_t depths[256],
|
||||
uint16_t bits[256],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
uint32_t histogram[256] = { 0 };
|
||||
size_t histogram_total;
|
||||
if (input_size < (1 << 15)) {
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = input_size;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
// We weigh the first 11 samples with weight 3 to account for the
|
||||
// balancing effect of the LZ77 phase on the histogram.
|
||||
const uint32_t adjust = 2 * std::min(histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
} else {
|
||||
static const size_t kSampleRate = 29;
|
||||
for (size_t i = 0; i < input_size; i += kSampleRate) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
// We add 1 to each population count to avoid 0 bit depths (since this is
|
||||
// only a sample and we don't know if the symbol appears or not), and we
|
||||
// weigh the first 11 samples with weight 3 to account for the balancing
|
||||
// effect of the LZ77 phase on the histogram (more frequent symbols are
|
||||
// more likely to be in backward references instead as literals).
|
||||
const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
}
|
||||
BuildAndStoreHuffmanTreeFast(histogram, histogram_total,
|
||||
/* max_bits = */ 8,
|
||||
depths, bits, storage_ix, storage);
|
||||
}
|
||||
|
||||
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
// "bits" based on "histogram" and stores it into the bit stream.
|
||||
static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
uint8_t depth[128],
|
||||
uint16_t bits[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
// in this order in the command bits saves a few branches in the Emit*
|
||||
// functions.
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth, 24);
|
||||
memcpy(cmd_depth + 24, depth + 40, 8);
|
||||
memcpy(cmd_depth + 32, depth + 24, 8);
|
||||
memcpy(cmd_depth + 40, depth + 48, 8);
|
||||
memcpy(cmd_depth + 48, depth + 32, 8);
|
||||
memcpy(cmd_depth + 56, depth + 56, 8);
|
||||
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits, 48);
|
||||
memcpy(bits + 24, cmd_bits + 32, 16);
|
||||
memcpy(bits + 32, cmd_bits + 48, 16);
|
||||
memcpy(bits + 40, cmd_bits + 24, 16);
|
||||
memcpy(bits + 48, cmd_bits + 40, 16);
|
||||
memcpy(bits + 56, cmd_bits + 56, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
// Create the bit length array for the full command alphabet.
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
memcpy(cmd_depth, depth, 8);
|
||||
memcpy(cmd_depth + 64, depth + 8, 8);
|
||||
memcpy(cmd_depth + 128, depth + 16, 8);
|
||||
memcpy(cmd_depth + 192, depth + 24, 8);
|
||||
memcpy(cmd_depth + 384, depth + 32, 8);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[40 + i];
|
||||
cmd_depth[256 + 8 * i] = depth[48 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[56 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
// REQUIRES: insertlen < 6210
|
||||
inline void EmitInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 6) {
|
||||
const size_t code = insertlen + 40;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
const size_t prefix = insertlen >> nbits;
|
||||
const size_t inscode = (nbits << 1) + prefix + 42;
|
||||
WriteBits(depth[inscode], bits[inscode], storage_ix, storage);
|
||||
WriteBits(nbits, insertlen - (prefix << nbits), storage_ix, storage);
|
||||
++histo[inscode];
|
||||
} else if (insertlen < 2114) {
|
||||
insertlen -= 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen);
|
||||
const size_t code = nbits + 50;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, insertlen - (1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
WriteBits(depth[61], bits[61], storage_ix, storage);
|
||||
WriteBits(12, insertlen - 2114, storage_ix, storage);
|
||||
++histo[21];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitLongInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 22594) {
|
||||
WriteBits(depth[62], bits[62], storage_ix, storage);
|
||||
WriteBits(14, insertlen - 6210, storage_ix, storage);
|
||||
++histo[22];
|
||||
} else {
|
||||
WriteBits(depth[63], bits[63], storage_ix, storage);
|
||||
WriteBits(24, insertlen - 22594, storage_ix, storage);
|
||||
++histo[23];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitCopyLen(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 10) {
|
||||
WriteBits(depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
|
||||
++histo[copylen + 14];
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 20;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 2118) {
|
||||
copylen -= 70;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 28;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
WriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
WriteBits(24, copylen - 2118, storage_ix, storage);
|
||||
++histo[47];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitCopyLenLastDistance(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 12) {
|
||||
WriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
|
||||
++histo[copylen - 4];
|
||||
} else if (copylen < 72) {
|
||||
copylen -= 8;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 4;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 136) {
|
||||
copylen -= 8;
|
||||
const size_t code = (copylen >> 5) + 30;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(5, copylen & 31, storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else if (copylen < 2120) {
|
||||
copylen -= 72;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 28;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else {
|
||||
WriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
WriteBits(24, copylen - 2120, storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[47];
|
||||
++histo[64];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitDistance(size_t distance,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
distance += 3;
|
||||
const uint32_t nbits = Log2FloorNonZero(distance) - 1u;
|
||||
const size_t prefix = (distance >> nbits) & 1;
|
||||
const size_t offset = (2 + prefix) << nbits;
|
||||
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
WriteBits(depth[distcode], bits[distcode], storage_ix, storage);
|
||||
WriteBits(nbits, distance - offset, storage_ix, storage);
|
||||
++histo[distcode];
|
||||
}
|
||||
|
||||
inline void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
const uint8_t depth[256], const uint16_t bits[256],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
const uint8_t lit = input[j];
|
||||
WriteBits(depth[lit], bits[lit], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
// REQUIRES: len <= 1 << 20.
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
// ISLAST
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
// MNIBBLES is 4
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
// MNIBBLES is 5
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
// ISUNCOMPRESSED
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static void UpdateBits(size_t n_bits,
|
||||
uint32_t bits,
|
||||
size_t pos,
|
||||
uint8_t *array) {
|
||||
while (n_bits > 0) {
|
||||
size_t byte_pos = pos >> 3;
|
||||
size_t n_unchanged_bits = pos & 7;
|
||||
size_t n_changed_bits = std::min(n_bits, 8 - n_unchanged_bits);
|
||||
size_t total_bits = n_unchanged_bits + n_changed_bits;
|
||||
uint32_t mask = (~((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1);
|
||||
uint32_t unchanged_bits = array[byte_pos] & mask;
|
||||
uint32_t changed_bits = bits & ((1 << n_changed_bits) - 1);
|
||||
array[byte_pos] =
|
||||
static_cast<uint8_t>((changed_bits << n_unchanged_bits) |
|
||||
unchanged_bits);
|
||||
n_bits -= n_changed_bits;
|
||||
bits >>= n_changed_bits;
|
||||
pos += n_changed_bits;
|
||||
}
|
||||
}
|
||||
|
||||
static void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= static_cast<uint8_t>(mask);
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
static bool ShouldMergeBlock(const uint8_t* data, size_t len,
|
||||
const uint8_t* depths) {
|
||||
size_t histo[256] = { 0 };
|
||||
static const size_t kSampleRate = 43;
|
||||
for (size_t i = 0; i < len; i += kSampleRate) {
|
||||
++histo[data[i]];
|
||||
}
|
||||
const size_t total = (len + kSampleRate - 1) / kSampleRate;
|
||||
double r = (FastLog2(total) + 0.5) * static_cast<double>(total) + 200;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
r -= static_cast<double>(histo[i]) * (depths[i] + FastLog2(histo[i]));
|
||||
}
|
||||
return r >= 0.0;
|
||||
}
|
||||
|
||||
inline bool ShouldUseUncompressedMode(const uint8_t* metablock_start,
|
||||
const uint8_t* next_emit,
|
||||
const size_t insertlen,
|
||||
const uint8_t literal_depths[256]) {
|
||||
const size_t compressed = static_cast<size_t>(next_emit - metablock_start);
|
||||
if (compressed * 50 > insertlen) {
|
||||
return false;
|
||||
}
|
||||
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
|
||||
static const double kMinEntropy =
|
||||
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
|
||||
uint32_t sum = 0;
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
const uint32_t n = literal_depths[i];
|
||||
sum += n << (15 - n);
|
||||
}
|
||||
return sum > static_cast<uint32_t>((1 << 15) * kMinEntropy);
|
||||
}
|
||||
|
||||
static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
|
||||
const size_t storage_ix_start,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t len = static_cast<size_t>(end - begin);
|
||||
RewindBitPosition(storage_ix_start, storage_ix, storage);
|
||||
StoreMetaBlockHeader(len, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], begin, len);
|
||||
*storage_ix += len << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
int* table, size_t table_size,
|
||||
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
|
||||
size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
if (input_size == 0) {
|
||||
assert(is_last);
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
|
||||
// "next_emit" is a pointer to the first byte that is not covered by a
|
||||
// previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
// the end of the input will be emitted as literal bytes.
|
||||
const uint8_t* next_emit = input;
|
||||
// Save the start of the first block for position and distance computations.
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
static const size_t kFirstBlockSize = 3 << 15;
|
||||
static const size_t kMergeBlockSize = 1 << 16;
|
||||
|
||||
const uint8_t* metablock_start = input;
|
||||
size_t block_size = std::min(input_size, kFirstBlockSize);
|
||||
size_t total_block_size = block_size;
|
||||
// Save the bit position of the MLEN field of the meta-block header, so that
|
||||
// we can update it later if we decide to extend this meta-block.
|
||||
size_t mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
|
||||
uint8_t lit_depth[256] = { 0 };
|
||||
uint16_t lit_bits[256] = { 0 };
|
||||
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
// Store the pre-compressed command and distance prefix codes.
|
||||
for (size_t i = 0; i + 7 < *cmd_code_numbits; i += 8) {
|
||||
WriteBits(8, cmd_code[i >> 3], storage_ix, storage);
|
||||
}
|
||||
WriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
|
||||
storage_ix, storage);
|
||||
|
||||
emit_commands:
|
||||
// Initialize the command and distance histograms. We will gather
|
||||
// statistics of command and distance codes during the processing
|
||||
// of this block and use it to update the command and distance
|
||||
// prefix codes for the next block.
|
||||
uint32_t cmd_histo[128] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
// "ip" is the input pointer.
|
||||
const uint8_t* ip = input;
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(table_size - 1 == static_cast<size_t>(
|
||||
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 5;
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
// For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
// sure that all distances are at most window size - 16.
|
||||
// For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
// we don't go over the block size with a copy.
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
// Step 1: Scan forward in the input looking for a 5-byte-long match.
|
||||
// If we get close to exhausting the input then goto emit_remainder.
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned, look at every third byte, etc.. When a match is found,
|
||||
// immediately go back to looking at every byte. This is a small loss
|
||||
// (~5% performance, ~0.1% density) for compressible data due to more
|
||||
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
// win since the compressor quickly "realizes" the data is incompressible
|
||||
// and doesn't bother looking for matches everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since the
|
||||
// last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
// number of bytes to move ahead for each iteration.
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
do {
|
||||
ip = next_ip;
|
||||
uint32_t hash = next_hash;
|
||||
assert(hash == Hash(ip, shift));
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
// Step 2: Emit the found match together with the literal bytes from
|
||||
// "next_emit" to the bit stream, and then see if we can find a next macth
|
||||
// immediately afterwards. Repeat until we find no match for the input
|
||||
// without emitting some literal bytes.
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
// We have a 5-byte match at ip, and we need to emit bytes in
|
||||
// [next_emit, ip).
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
ip += matched;
|
||||
int distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
size_t insert = static_cast<size_t>(base - next_emit);
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
if (PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
lit_depth)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
input_size -= static_cast<size_t>(base - input);
|
||||
input = base;
|
||||
next_emit = input;
|
||||
goto next_block;
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
}
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
if (distance == last_distance) {
|
||||
WriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
|
||||
++cmd_histo[64];
|
||||
} else {
|
||||
EmitDistance(static_cast<size_t>(distance), cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
// We have a 5-byte match at ip, and no need to emit any literal bytes
|
||||
// prior to ip.
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
ip += matched;
|
||||
last_distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitDistance(static_cast<size_t>(last_distance), cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
block_size = std::min(input_size, kMergeBlockSize);
|
||||
|
||||
// Decide if we want to continue this meta-block instead of emitting the
|
||||
// last insert-only command.
|
||||
if (input_size > 0 &&
|
||||
total_block_size + block_size <= (1 << 20) &&
|
||||
ShouldMergeBlock(input, block_size, lit_depth)) {
|
||||
assert(total_block_size > (1 << 16));
|
||||
// Update the size of the current meta-block and continue emitting commands.
|
||||
// We can do this because the current size and the new size both have 5
|
||||
// nibbles.
|
||||
total_block_size += block_size;
|
||||
UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
|
||||
mlen_storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
// Emit the remaining bytes as literals.
|
||||
if (next_emit < ip_end) {
|
||||
const size_t insert = static_cast<size_t>(ip_end - next_emit);
|
||||
if (PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
lit_depth)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
}
|
||||
}
|
||||
next_emit = ip_end;
|
||||
|
||||
next_block:
|
||||
// If we have more data, write a new meta-block header and prefix codes and
|
||||
// then continue emitting commands.
|
||||
if (input_size > 0) {
|
||||
metablock_start = input;
|
||||
block_size = std::min(input_size, kFirstBlockSize);
|
||||
total_block_size = block_size;
|
||||
// Save the bit position of the MLEN field of the meta-block header, so that
|
||||
// we can update it later if we decide to extend this meta-block.
|
||||
mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
memset(lit_depth, 0, sizeof(lit_depth));
|
||||
memset(lit_bits, 0, sizeof(lit_bits));
|
||||
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
} else {
|
||||
// If this is not the last block, update the command and distance prefix
|
||||
// codes for the next block and store the compressed forms.
|
||||
cmd_code[0] = 0;
|
||||
*cmd_code_numbits = 0;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
cmd_code_numbits, cmd_code);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,47 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses one-pass processing: when we find a backward
|
||||
// match, we immediately emit the corresponding command and literal codes to
|
||||
// the bit stream.
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
// meta-blocks, and updates the "*storage_ix" bit position.
|
||||
//
|
||||
// If "is_last" is true, emits an additional empty last meta-block.
|
||||
//
|
||||
// "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
|
||||
// (see comment in encode.h) used for the encoding of this input fragment.
|
||||
// If "is_last" is false, they are updated to reflect the statistics
|
||||
// of this input fragment, to be used for the encoding of the next fragment.
|
||||
//
|
||||
// "*cmd_code_numbits" is the number of bits of the compressed representation
|
||||
// of the command and distance prefix codes, and "cmd_code" is an array of
|
||||
// at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
|
||||
// command and distance prefix codes. If "is_last" is false, these are also
|
||||
// updated to represent the updated "cmd_depth" and "cmd_bits".
|
||||
//
|
||||
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
|
||||
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
// REQUIRES: "table_size" is a power of two
|
||||
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
int* table, size_t table_size,
|
||||
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
|
||||
size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
|
@ -0,0 +1,524 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses two-pass processing: in the first pass we save
|
||||
// the found backward matches and literal bytes into a buffer, and in the
|
||||
// second pass we emit them into the bit stream using prefix codes built based
|
||||
// on the actual command and literal byte histograms.
|
||||
|
||||
#include "./compress_fragment_two_pass.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 2);
|
||||
const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4] &&
|
||||
p1[5] == p2[5]);
|
||||
}
|
||||
|
||||
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
// "bits" based on "histogram" and stores it into the bit stream.
|
||||
static void BuildAndStoreCommandPrefixCode(
|
||||
const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
// in this order in the command bits saves a few branches in the Emit*
|
||||
// functions.
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth + 24, 24);
|
||||
memcpy(cmd_depth + 24, depth, 8);
|
||||
memcpy(cmd_depth + 32, depth + 48, 8);
|
||||
memcpy(cmd_depth + 40, depth + 8, 8);
|
||||
memcpy(cmd_depth + 48, depth + 56, 8);
|
||||
memcpy(cmd_depth + 56, depth + 16, 8);
|
||||
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits + 24, 16);
|
||||
memcpy(bits + 8, cmd_bits + 40, 16);
|
||||
memcpy(bits + 16, cmd_bits + 56, 16);
|
||||
memcpy(bits + 24, cmd_bits, 48);
|
||||
memcpy(bits + 48, cmd_bits + 32, 16);
|
||||
memcpy(bits + 56, cmd_bits + 48, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
// Create the bit length array for the full command alphabet.
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
memcpy(cmd_depth, depth + 24, 8);
|
||||
memcpy(cmd_depth + 64, depth + 32, 8);
|
||||
memcpy(cmd_depth + 128, depth + 40, 8);
|
||||
memcpy(cmd_depth + 192, depth + 48, 8);
|
||||
memcpy(cmd_depth + 384, depth + 56, 8);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[i];
|
||||
cmd_depth[256 + 8 * i] = depth[8 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[16 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
|
||||
if (insertlen < 6) {
|
||||
**commands = insertlen;
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
const uint32_t prefix = insertlen >> nbits;
|
||||
const uint32_t inscode = (nbits << 1) + prefix + 2;
|
||||
const uint32_t extra = insertlen - (prefix << nbits);
|
||||
**commands = inscode | (extra << 8);
|
||||
} else if (insertlen < 2114) {
|
||||
insertlen -= 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen);
|
||||
const uint32_t code = nbits + 10;
|
||||
const uint32_t extra = insertlen - (1 << nbits);
|
||||
**commands = code | (extra << 8);
|
||||
} else if (insertlen < 6210) {
|
||||
const uint32_t extra = insertlen - 2114;
|
||||
**commands = 21 | (extra << 8);
|
||||
} else if (insertlen < 22594) {
|
||||
const uint32_t extra = insertlen - 6210;
|
||||
**commands = 22 | (extra << 8);
|
||||
} else {
|
||||
const uint32_t extra = insertlen - 22594;
|
||||
**commands = 23 | (extra << 8);
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
inline void EmitCopyLen(size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 10) {
|
||||
**commands = static_cast<uint32_t>(copylen + 38);
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
const size_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 44;
|
||||
const size_t extra = copylen - (prefix << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
} else if (copylen < 2118) {
|
||||
copylen -= 70;
|
||||
const size_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = copylen - (1 << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
} else {
|
||||
const size_t extra = copylen - 2118;
|
||||
**commands = static_cast<uint32_t>(63 | (extra << 8));
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
inline void EmitCopyLenLastDistance(size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 12) {
|
||||
**commands = static_cast<uint32_t>(copylen + 20);
|
||||
++(*commands);
|
||||
} else if (copylen < 72) {
|
||||
copylen -= 8;
|
||||
const size_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 28;
|
||||
const size_t extra = copylen - (prefix << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
++(*commands);
|
||||
} else if (copylen < 136) {
|
||||
copylen -= 8;
|
||||
const size_t code = (copylen >> 5) + 54;
|
||||
const size_t extra = copylen & 31;
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else if (copylen < 2120) {
|
||||
copylen -= 72;
|
||||
const size_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = copylen - (1 << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
const size_t extra = copylen - 2120;
|
||||
**commands = static_cast<uint32_t>(63 | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
distance += 3;
|
||||
uint32_t nbits = Log2FloorNonZero(distance) - 1;
|
||||
const uint32_t prefix = (distance >> nbits) & 1;
|
||||
const uint32_t offset = (2 + prefix) << nbits;
|
||||
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
uint32_t extra = distance - offset;
|
||||
**commands = distcode | (extra << 8);
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
// REQUIRES: len <= 1 << 20.
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
// ISLAST
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
// MNIBBLES is 4
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
// MNIBBLES is 5
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
// ISUNCOMPRESSED
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
size_t input_size, const uint8_t* base_ip,
|
||||
int* table, size_t table_size,
|
||||
uint8_t** literals, uint32_t** commands) {
|
||||
// "ip" is the input pointer.
|
||||
const uint8_t* ip = input;
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(table_size - 1 == static_cast<size_t>(
|
||||
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
// "next_emit" is a pointer to the first byte that is not covered by a
|
||||
// previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
// the end of the input will be emitted as literal bytes.
|
||||
const uint8_t* next_emit = input;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 6;
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
// For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
// sure that all distances are at most window size - 16.
|
||||
// For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
// we don't go over the block size with a copy.
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
// Step 1: Scan forward in the input looking for a 6-byte-long match.
|
||||
// If we get close to exhausting the input then goto emit_remainder.
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned, look at every third byte, etc.. When a match is found,
|
||||
// immediately go back to looking at every byte. This is a small loss
|
||||
// (~5% performance, ~0.1% density) for compressible data due to more
|
||||
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
// win since the compressor quickly "realizes" the data is incompressible
|
||||
// and doesn't bother looking for matches everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since the
|
||||
// last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
// number of bytes to move ahead for each iteration.
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
do {
|
||||
ip = next_ip;
|
||||
uint32_t hash = next_hash;
|
||||
assert(hash == Hash(ip, shift));
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
// Step 2: Emit the found match together with the literal bytes from
|
||||
// "next_emit", and then see if we can find a next macth immediately
|
||||
// afterwards. Repeat until we find no match for the input
|
||||
// without emitting some literal bytes.
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
// We have a 6-byte match at ip, and we need to emit bytes in
|
||||
// [next_emit, ip).
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
ip += matched;
|
||||
int distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
int insert = static_cast<int>(base - next_emit);
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitInsertLen(static_cast<uint32_t>(insert), commands);
|
||||
memcpy(*literals, next_emit, static_cast<size_t>(insert));
|
||||
*literals += insert;
|
||||
if (distance == last_distance) {
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
EmitDistance(static_cast<uint32_t>(distance), commands);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, commands);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
// We have a 6-byte match at ip, and no need to emit any
|
||||
// literal bytes prior to ip.
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
ip += matched;
|
||||
last_distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, commands);
|
||||
EmitDistance(static_cast<uint32_t>(last_distance), commands);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
// Emit the remaining bytes as literals.
|
||||
if (next_emit < ip_end) {
|
||||
const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
|
||||
EmitInsertLen(insert, commands);
|
||||
memcpy(*literals, next_emit, insert);
|
||||
*literals += insert;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreCommands(const uint8_t* literals, const size_t num_literals,
|
||||
const uint32_t* commands, const size_t num_commands,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
uint8_t lit_depths[256] = { 0 };
|
||||
uint16_t lit_bits[256] = { 0 };
|
||||
uint32_t lit_histo[256] = { 0 };
|
||||
for (size_t i = 0; i < num_literals; ++i) {
|
||||
++lit_histo[literals[i]];
|
||||
}
|
||||
BuildAndStoreHuffmanTreeFast(lit_histo, num_literals,
|
||||
/* max_bits = */ 8,
|
||||
lit_depths, lit_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
uint8_t cmd_depths[128] = { 0 };
|
||||
uint16_t cmd_bits[128] = { 0 };
|
||||
uint32_t cmd_histo[128] = { 0 };
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
++cmd_histo[commands[i] & 0xff];
|
||||
}
|
||||
cmd_histo[1] += 1;
|
||||
cmd_histo[2] += 1;
|
||||
cmd_histo[64] += 1;
|
||||
cmd_histo[84] += 1;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
static const uint32_t kNumExtraBits[128] = {
|
||||
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
||||
17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
|
||||
};
|
||||
static const uint32_t kInsertOffset[24] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
|
||||
1090, 2114, 6210, 22594,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const uint32_t cmd = commands[i];
|
||||
const uint32_t code = cmd & 0xff;
|
||||
const uint32_t extra = cmd >> 8;
|
||||
WriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
|
||||
WriteBits(kNumExtraBits[code], extra, storage_ix, storage);
|
||||
if (code < 24) {
|
||||
const uint32_t insert = kInsertOffset[code] + extra;
|
||||
for (uint32_t j = 0; j < insert; ++j) {
|
||||
const uint8_t lit = *literals;
|
||||
WriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
|
||||
++literals;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool ShouldCompress(const uint8_t* input, size_t input_size,
|
||||
size_t num_literals) {
|
||||
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
|
||||
static const double kMaxRatioOfLiterals =
|
||||
1.0 - kAcceptableLossForUncompressibleSpeedup;
|
||||
if (num_literals < kMaxRatioOfLiterals * static_cast<double>(input_size)) {
|
||||
return true;
|
||||
}
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
static const uint32_t kSampleRate = 43;
|
||||
static const double kMaxEntropy =
|
||||
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
|
||||
const double max_total_bit_cost =
|
||||
static_cast<double>(input_size) * kMaxEntropy / kSampleRate;
|
||||
for (size_t i = 0; i < input_size; i += kSampleRate) {
|
||||
++literal_histo[input[i]];
|
||||
}
|
||||
return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
|
||||
}
|
||||
|
||||
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
// Save the start of the first block for position and distance computations.
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
while (input_size > 0) {
|
||||
size_t block_size = std::min(input_size, kCompressFragmentTwoPassBlockSize);
|
||||
uint32_t* commands = command_buf;
|
||||
uint8_t* literals = literal_buf;
|
||||
CreateCommands(input, block_size, input_size, base_ip, table, table_size,
|
||||
&literals, &commands);
|
||||
const size_t num_literals = static_cast<size_t>(literals - literal_buf);
|
||||
const size_t num_commands = static_cast<size_t>(commands - command_buf);
|
||||
if (ShouldCompress(input, block_size, num_literals)) {
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
StoreCommands(literal_buf, num_literals, command_buf, num_commands,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
// Since we did not find many backward references and the entropy of
|
||||
// the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
// This makes compression speed of uncompressible data about 3x faster.
|
||||
StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, block_size);
|
||||
*storage_ix += block_size << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,40 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses two-pass processing: in the first pass we save
|
||||
// the found backward matches and literal bytes into a buffer, and in the
|
||||
// second pass we emit them into the bit stream using prefix codes built based
|
||||
// on the actual command and literal byte histograms.
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
|
||||
|
||||
// Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
// meta-blocks, and updates the "*storage_ix" bit position.
|
||||
//
|
||||
// If "is_last" is true, emits an additional empty last meta-block.
|
||||
//
|
||||
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
|
||||
// REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
// kCompressFragmentTwoPassBlockSize long arrays.
|
||||
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
// REQUIRES: "table_size" is a power of two
|
||||
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
|
@ -0,0 +1,15 @@
|
|||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* C++ API for Brotli compression. */
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESSOR_H_
|
||||
#define BROTLI_ENC_COMPRESSOR_H_
|
||||
|
||||
#include "./encode.h"
|
||||
#include "./streams.h"
|
||||
|
||||
#endif /* BROTLI_ENC_COMPRESSOR_H_ */
|
|
@ -0,0 +1,178 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions to map previous bytes into a context id.
|
||||
|
||||
#ifndef BROTLI_ENC_CONTEXT_H_
|
||||
#define BROTLI_ENC_CONTEXT_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Second-order context lookup table for UTF8 byte streams.
|
||||
//
|
||||
// If p1 and p2 are the previous two bytes, we calculate the context as
|
||||
//
|
||||
// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
|
||||
//
|
||||
// If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
||||
// equivalent to
|
||||
//
|
||||
// context = 4 * context1(p1) + context2(p2),
|
||||
//
|
||||
// where context1 is based on the previous byte in the following way:
|
||||
//
|
||||
// 0 : non-ASCII control
|
||||
// 1 : \t, \n, \r
|
||||
// 2 : space
|
||||
// 3 : other punctuation
|
||||
// 4 : " '
|
||||
// 5 : %
|
||||
// 6 : ( < [ {
|
||||
// 7 : ) > ] }
|
||||
// 8 : , ; :
|
||||
// 9 : .
|
||||
// 10 : =
|
||||
// 11 : number
|
||||
// 12 : upper-case vowel
|
||||
// 13 : upper-case consonant
|
||||
// 14 : lower-case vowel
|
||||
// 15 : lower-case consonant
|
||||
//
|
||||
// and context2 is based on the second last byte:
|
||||
//
|
||||
// 0 : control, space
|
||||
// 1 : punctuation
|
||||
// 2 : upper-case letter, number
|
||||
// 3 : lower-case letter
|
||||
//
|
||||
// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
||||
// stream it will be a continuation byte, value between 128 and 191), the
|
||||
// context is the same as if the second last byte was an ASCII control or space.
|
||||
//
|
||||
// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
||||
// be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
||||
// the last byte and to a lesser extent on the second last byte if it is ASCII.
|
||||
//
|
||||
// If the last byte is a UTF8 continuation byte, the second last byte can be:
|
||||
// - continuation byte: the next byte is probably ASCII or lead byte (assuming
|
||||
// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
||||
// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
||||
// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
||||
//
|
||||
// The possible value combinations of the previous two bytes, the range of
|
||||
// context ids and the type of the next byte is summarized in the table below:
|
||||
//
|
||||
// |--------\-----------------------------------------------------------------|
|
||||
// | \ Last byte |
|
||||
// | Second \---------------------------------------------------------------|
|
||||
// | last byte \ ASCII | cont. byte | lead byte |
|
||||
// | \ (0-127) | (128-191) | (192-) |
|
||||
// |=============|===================|=====================|==================|
|
||||
// | ASCII | next: ASCII/lead | not valid | next: cont. |
|
||||
// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
||||
// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: ASCII/lead | not valid |
|
||||
// | (192-207) | | context: 0 - 1 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: cont. | not valid |
|
||||
// | (208-) | | context: 2 - 3 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
static const uint8_t kUTF8ContextLookup[512] = {
|
||||
// Last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
||||
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
||||
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
||||
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
||||
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
||||
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
||||
// UTF8 continuation byte range.
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
// UTF8 lead byte range.
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
// Second last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
||||
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
||||
// UTF8 continuation byte range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// UTF8 lead byte range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
};
|
||||
|
||||
// Context lookup table for small signed integers.
|
||||
static const uint8_t kSigned3BitContextLookup[] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
};
|
||||
|
||||
enum ContextType {
|
||||
CONTEXT_LSB6 = 0,
|
||||
CONTEXT_MSB6 = 1,
|
||||
CONTEXT_UTF8 = 2,
|
||||
CONTEXT_SIGNED = 3
|
||||
};
|
||||
|
||||
static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
|
||||
switch (mode) {
|
||||
case CONTEXT_LSB6:
|
||||
return p1 & 0x3f;
|
||||
case CONTEXT_MSB6:
|
||||
return static_cast<uint8_t>(p1 >> 2);
|
||||
case CONTEXT_UTF8:
|
||||
return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
|
||||
case CONTEXT_SIGNED:
|
||||
return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
|
||||
kSigned3BitContextLookup[p2]);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_CONTEXT_H_
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,41 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Collection of static dictionary words.
|
||||
|
||||
#ifndef BROTLI_ENC_DICTIONARY_H_
|
||||
#define BROTLI_ENC_DICTIONARY_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
// No namespace, use same identifier as for the C decoder.
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern const uint8_t kBrotliDictionary[122784];
|
||||
|
||||
static const uint32_t kBrotliDictionaryOffsetsByLength[] = {
|
||||
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032,
|
||||
53248, 63488, 74752, 87040, 93696, 100864, 104704, 106752, 108928, 113536,
|
||||
115968, 118528, 119872, 121280, 122016,
|
||||
};
|
||||
|
||||
static const uint8_t kBrotliDictionarySizeBitsByLength[] = {
|
||||
0, 0, 0, 0, 10, 10, 11, 11, 10, 10,
|
||||
10, 10, 10, 9, 9, 8, 7, 7, 8, 7,
|
||||
7, 6, 6, 5, 5,
|
||||
};
|
||||
|
||||
static const int kBrotliMinDictionaryWordLength = 4;
|
||||
static const int kBrotliMaxDictionaryWordLength = 24;
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif // BROTLI_ENC_DICTIONARY_H_
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,209 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// API for Brotli compression
|
||||
|
||||
#ifndef BROTLI_ENC_ENCODE_H_
|
||||
#define BROTLI_ENC_ENCODE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
#include "./ringbuffer.h"
|
||||
#include "./static_dict.h"
|
||||
#include "./streams.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const int kMaxWindowBits = 24;
|
||||
static const int kMinWindowBits = 10;
|
||||
static const int kMinInputBlockBits = 16;
|
||||
static const int kMaxInputBlockBits = 24;
|
||||
|
||||
struct BrotliParams {
|
||||
BrotliParams(void)
|
||||
: mode(MODE_GENERIC),
|
||||
quality(11),
|
||||
lgwin(22),
|
||||
lgblock(0),
|
||||
enable_dictionary(true),
|
||||
enable_transforms(false),
|
||||
greedy_block_split(false),
|
||||
enable_context_modeling(true) {}
|
||||
|
||||
enum Mode {
|
||||
// Default compression mode. The compressor does not know anything in
|
||||
// advance about the properties of the input.
|
||||
MODE_GENERIC = 0,
|
||||
// Compression mode for UTF-8 format text input.
|
||||
MODE_TEXT = 1,
|
||||
// Compression mode used in WOFF 2.0.
|
||||
MODE_FONT = 2
|
||||
};
|
||||
Mode mode;
|
||||
|
||||
// Controls the compression-speed vs compression-density tradeoffs. The higher
|
||||
// the quality, the slower the compression. Range is 0 to 11.
|
||||
int quality;
|
||||
// Base 2 logarithm of the sliding window size. Range is 10 to 24.
|
||||
int lgwin;
|
||||
// Base 2 logarithm of the maximum input block size. Range is 16 to 24.
|
||||
// If set to 0, the value will be set based on the quality.
|
||||
int lgblock;
|
||||
|
||||
// These settings are deprecated and will be ignored.
|
||||
// All speed vs. size compromises are controlled by the quality param.
|
||||
bool enable_dictionary;
|
||||
bool enable_transforms;
|
||||
bool greedy_block_split;
|
||||
bool enable_context_modeling;
|
||||
};
|
||||
|
||||
// An instance can not be reused for multiple brotli streams.
|
||||
class BrotliCompressor {
|
||||
public:
|
||||
explicit BrotliCompressor(BrotliParams params);
|
||||
~BrotliCompressor(void);
|
||||
|
||||
// The maximum input size that can be processed at once.
|
||||
size_t input_block_size(void) const { return size_t(1) << params_.lgblock; }
|
||||
|
||||
// Encodes the data in input_buffer as a meta-block and writes it to
|
||||
// encoded_buffer (*encoded_size should be set to the size of
|
||||
// encoded_buffer) and sets *encoded_size to the number of bytes that
|
||||
// was written. The input_size must be <= input_block_size().
|
||||
// Returns 0 if there was an error and 1 otherwise.
|
||||
bool WriteMetaBlock(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
// Writes a metadata meta-block containing the given input to encoded_buffer.
|
||||
// *encoded_size should be set to the size of the encoded_buffer.
|
||||
// Sets *encoded_size to the number of bytes that was written.
|
||||
// Note that the given input data will not be part of the sliding window and
|
||||
// thus no backward references can be made to this data from subsequent
|
||||
// metablocks.
|
||||
bool WriteMetadata(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
// Writes a zero-length meta-block with end-of-input bit set to the
|
||||
// internal output buffer and copies the output buffer to encoded_buffer
|
||||
// (*encoded_size should be set to the size of encoded_buffer) and sets
|
||||
// *encoded_size to the number of bytes written. Returns false if there was
|
||||
// an error and true otherwise.
|
||||
bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
|
||||
|
||||
// Copies the given input data to the internal ring buffer of the compressor.
|
||||
// No processing of the data occurs at this time and this function can be
|
||||
// called multiple times before calling WriteBrotliData() to process the
|
||||
// accumulated input. At most input_block_size() bytes of input data can be
|
||||
// copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
|
||||
void CopyInputToRingBuffer(const size_t input_size,
|
||||
const uint8_t* input_buffer);
|
||||
|
||||
// Processes the accumulated input data and sets *out_size to the length of
|
||||
// the new output meta-block, or to zero if no new output meta-block was
|
||||
// created (in this case the processed input data is buffered internally).
|
||||
// If *out_size is positive, *output points to the start of the output data.
|
||||
// If is_last or force_flush is true, an output meta-block is always created.
|
||||
// Returns false if the size of the input data is larger than
|
||||
// input_block_size().
|
||||
bool WriteBrotliData(const bool is_last, const bool force_flush,
|
||||
size_t* out_size, uint8_t** output);
|
||||
|
||||
// Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
|
||||
// e.g. for custom static dictionaries for data formats.
|
||||
// Not to be confused with the built-in transformable dictionary of Brotli.
|
||||
// To decode, use BrotliSetCustomDictionary of the decoder with the same
|
||||
// dictionary.
|
||||
void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
|
||||
|
||||
// No-op, but we keep it here for API backward-compatibility.
|
||||
void WriteStreamHeader(void) {}
|
||||
|
||||
private:
|
||||
uint8_t* GetBrotliStorage(size_t size);
|
||||
|
||||
// Allocates and clears a hash table using memory in "*this",
|
||||
// stores the number of buckets in "*table_size" and returns a pointer to
|
||||
// the base of the hash table.
|
||||
int* GetHashTable(int quality,
|
||||
size_t input_size, size_t* table_size);
|
||||
|
||||
BrotliParams params_;
|
||||
Hashers* hashers_;
|
||||
int hash_type_;
|
||||
uint64_t input_pos_;
|
||||
RingBuffer* ringbuffer_;
|
||||
size_t cmd_alloc_size_;
|
||||
Command* commands_;
|
||||
size_t num_commands_;
|
||||
size_t num_literals_;
|
||||
size_t last_insert_len_;
|
||||
uint64_t last_flush_pos_;
|
||||
uint64_t last_processed_pos_;
|
||||
int dist_cache_[4];
|
||||
int saved_dist_cache_[4];
|
||||
uint8_t last_byte_;
|
||||
uint8_t last_byte_bits_;
|
||||
uint8_t prev_byte_;
|
||||
uint8_t prev_byte2_;
|
||||
size_t storage_size_;
|
||||
uint8_t* storage_;
|
||||
// Hash table for quality 0 mode.
|
||||
int small_table_[1 << 10]; // 2KB
|
||||
int* large_table_; // Allocated only when needed
|
||||
// Command and distance prefix codes (each 64 symbols, stored back-to-back)
|
||||
// used for the next block in quality 0. The command prefix code is over a
|
||||
// smaller alphabet with the following 64 symbols:
|
||||
// 0 - 15: insert length code 0, copy length code 0 - 15, same distance
|
||||
// 16 - 39: insert length code 0, copy length code 0 - 23
|
||||
// 40 - 63: insert length code 0 - 23, copy length code 0
|
||||
// Note that symbols 16 and 40 represent the same code in the full alphabet,
|
||||
// but we do not use either of them in quality 0.
|
||||
uint8_t cmd_depths_[128];
|
||||
uint16_t cmd_bits_[128];
|
||||
// The compressed form of the command and distance prefix codes for the next
|
||||
// block in quality 0.
|
||||
uint8_t cmd_code_[512];
|
||||
size_t cmd_code_numbits_;
|
||||
// Command and literal buffers for quality 1.
|
||||
uint32_t* command_buf_;
|
||||
uint8_t* literal_buf_;
|
||||
|
||||
int is_last_block_emitted_;
|
||||
};
|
||||
|
||||
// Compresses the data in input_buffer into encoded_buffer, and sets
|
||||
// *encoded_size to the compressed length.
|
||||
// Returns 0 if there was an error and 1 otherwise.
|
||||
int BrotliCompressBuffer(BrotliParams params,
|
||||
size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
// Same as above, but uses the specified input and output classes instead
|
||||
// of reading from and writing to pre-allocated memory buffers.
|
||||
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
|
||||
|
||||
// Before compressing the data, sets a custom LZ77 dictionary with
|
||||
// BrotliCompressor::BrotliSetCustomDictionary.
|
||||
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
BrotliParams params,
|
||||
BrotliIn* in, BrotliOut* out);
|
||||
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENCODE_H_
|
|
@ -0,0 +1,279 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Implementation of parallel Brotli compressor.
|
||||
|
||||
#include "./encode_parallel.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "./backward_references.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./cluster.h"
|
||||
#include "./context.h"
|
||||
#include "./metablock.h"
|
||||
#include "./transform.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./hash.h"
|
||||
#include "./histogram.h"
|
||||
#include "./prefix.h"
|
||||
#include "./utf8_util.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
namespace {
|
||||
|
||||
void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
|
||||
uint32_t num_direct_distance_codes,
|
||||
uint32_t distance_postfix_bits) {
|
||||
if (num_direct_distance_codes == 0 &&
|
||||
distance_postfix_bits == 0) {
|
||||
return;
|
||||
}
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
Command* cmd = &cmds[i];
|
||||
if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
|
||||
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
&cmd->dist_prefix_,
|
||||
&cmd->dist_extra_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
const uint32_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const uint32_t prefix_size,
|
||||
const uint8_t* prefix_buffer,
|
||||
const bool is_first,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
if (input_size == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy prefix + next input block into a continuous area.
|
||||
uint32_t input_pos = prefix_size;
|
||||
// CreateBackwardReferences reads up to 3 bytes past the end of input if the
|
||||
// mask points past the end of input.
|
||||
// FindMatchLengthWithLimit could do another 8 bytes look-forward.
|
||||
std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
|
||||
memcpy(&input[0], prefix_buffer, prefix_size);
|
||||
memcpy(&input[input_pos], input_buffer, input_size);
|
||||
// Since we don't have a ringbuffer, masking is a no-op.
|
||||
// We use one less bit than the full range because some of the code uses
|
||||
// mask + 1 as the size of the ringbuffer.
|
||||
const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
|
||||
|
||||
uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
|
||||
uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
|
||||
|
||||
// Decide about UTF8 mode.
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
|
||||
kMinUTF8Ratio);
|
||||
|
||||
// Initialize hashers.
|
||||
int hash_type = std::min(10, params.quality);
|
||||
Hashers* hashers = new Hashers();
|
||||
hashers->Init(hash_type);
|
||||
|
||||
// Compute backward references.
|
||||
size_t last_insert_len = 0;
|
||||
size_t num_commands = 0;
|
||||
size_t num_literals = 0;
|
||||
int dist_cache[4] = { -4, -4, -4, -4 };
|
||||
Command* commands = static_cast<Command*>(
|
||||
malloc(sizeof(Command) * ((input_size + 1) >> 1)));
|
||||
if (commands == 0) {
|
||||
delete hashers;
|
||||
return false;
|
||||
}
|
||||
CreateBackwardReferences(
|
||||
input_size, input_pos, is_last,
|
||||
&input[0], mask,
|
||||
params.quality,
|
||||
params.lgwin,
|
||||
hashers,
|
||||
hash_type,
|
||||
dist_cache,
|
||||
&last_insert_len,
|
||||
commands,
|
||||
&num_commands,
|
||||
&num_literals);
|
||||
delete hashers;
|
||||
if (last_insert_len > 0) {
|
||||
commands[num_commands++] = Command(last_insert_len);
|
||||
num_literals += last_insert_len;
|
||||
}
|
||||
assert(num_commands != 0);
|
||||
|
||||
// Build the meta-block.
|
||||
MetaBlockSplit mb;
|
||||
uint32_t num_direct_distance_codes =
|
||||
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
||||
uint32_t distance_postfix_bits =
|
||||
params.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
||||
ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
|
||||
RecomputeDistancePrefixes(commands, num_commands,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits);
|
||||
if (params.quality <= 9) {
|
||||
BuildMetaBlockGreedy(&input[0], input_pos, mask,
|
||||
commands, num_commands,
|
||||
&mb);
|
||||
} else {
|
||||
BuildMetaBlock(&input[0], input_pos, mask,
|
||||
prev_byte, prev_byte2,
|
||||
commands, num_commands,
|
||||
literal_context_mode,
|
||||
&mb);
|
||||
}
|
||||
|
||||
// Set up the temporary output storage.
|
||||
const size_t max_out_size = 2 * input_size + 500;
|
||||
std::vector<uint8_t> storage(max_out_size);
|
||||
uint8_t first_byte = 0;
|
||||
size_t first_byte_bits = 0;
|
||||
if (is_first) {
|
||||
if (params.lgwin == 16) {
|
||||
first_byte = 0;
|
||||
first_byte_bits = 1;
|
||||
} else if (params.lgwin == 17) {
|
||||
first_byte = 1;
|
||||
first_byte_bits = 7;
|
||||
} else {
|
||||
first_byte = static_cast<uint8_t>(((params.lgwin - 17) << 1) | 1);
|
||||
first_byte_bits = 4;
|
||||
}
|
||||
}
|
||||
storage[0] = static_cast<uint8_t>(first_byte);
|
||||
size_t storage_ix = first_byte_bits;
|
||||
|
||||
// Store the meta-block to the temporary output.
|
||||
StoreMetaBlock(&input[0], input_pos, input_size, mask,
|
||||
prev_byte, prev_byte2,
|
||||
is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
literal_context_mode,
|
||||
commands, num_commands,
|
||||
mb,
|
||||
&storage_ix, &storage[0]);
|
||||
free(commands);
|
||||
|
||||
// If this is not the last meta-block, store an empty metadata
|
||||
// meta-block so that the meta-block will end at a byte boundary.
|
||||
if (!is_last) {
|
||||
StoreSyncMetaBlock(&storage_ix, &storage[0]);
|
||||
}
|
||||
|
||||
// If the compressed data is too large, fall back to an uncompressed
|
||||
// meta-block.
|
||||
size_t output_size = storage_ix >> 3;
|
||||
if (input_size + 4 < output_size) {
|
||||
storage[0] = static_cast<uint8_t>(first_byte);
|
||||
storage_ix = first_byte_bits;
|
||||
StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
|
||||
input_size,
|
||||
&storage_ix, &storage[0]);
|
||||
output_size = storage_ix >> 3;
|
||||
}
|
||||
|
||||
// Copy the temporary output with size-check to the output.
|
||||
if (output_size > *encoded_size) {
|
||||
return false;
|
||||
}
|
||||
memcpy(encoded_buffer, &storage[0], output_size);
|
||||
*encoded_size = output_size;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int BrotliCompressBufferParallel(BrotliParams params,
|
||||
size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
if (*encoded_size == 0) {
|
||||
// Output buffer needs at least one byte.
|
||||
return 0;
|
||||
} else if (input_size == 0) {
|
||||
encoded_buffer[0] = 6;
|
||||
*encoded_size = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Sanitize params.
|
||||
if (params.lgwin < kMinWindowBits) {
|
||||
params.lgwin = kMinWindowBits;
|
||||
} else if (params.lgwin > kMaxWindowBits) {
|
||||
params.lgwin = kMaxWindowBits;
|
||||
}
|
||||
if (params.lgblock == 0) {
|
||||
params.lgblock = 16;
|
||||
if (params.quality >= 9 && params.lgwin > params.lgblock) {
|
||||
params.lgblock = std::min(21, params.lgwin);
|
||||
}
|
||||
} else if (params.lgblock < kMinInputBlockBits) {
|
||||
params.lgblock = kMinInputBlockBits;
|
||||
} else if (params.lgblock > kMaxInputBlockBits) {
|
||||
params.lgblock = kMaxInputBlockBits;
|
||||
}
|
||||
size_t max_input_block_size = 1 << params.lgblock;
|
||||
size_t max_prefix_size = 1u << params.lgwin;
|
||||
|
||||
std::vector<std::vector<uint8_t> > compressed_pieces;
|
||||
|
||||
// Compress block-by-block independently.
|
||||
for (size_t pos = 0; pos < input_size; ) {
|
||||
uint32_t input_block_size =
|
||||
static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
|
||||
uint32_t prefix_size =
|
||||
static_cast<uint32_t>(std::min(max_prefix_size, pos));
|
||||
size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
|
||||
std::vector<uint8_t> out(out_size);
|
||||
if (!WriteMetaBlockParallel(params,
|
||||
input_block_size,
|
||||
&input_buffer[pos],
|
||||
prefix_size,
|
||||
&input_buffer[pos - prefix_size],
|
||||
pos == 0,
|
||||
pos + input_block_size == input_size,
|
||||
&out_size,
|
||||
&out[0])) {
|
||||
return false;
|
||||
}
|
||||
out.resize(out_size);
|
||||
compressed_pieces.push_back(out);
|
||||
pos += input_block_size;
|
||||
}
|
||||
|
||||
// Piece together the output.
|
||||
size_t out_pos = 0;
|
||||
for (size_t i = 0; i < compressed_pieces.size(); ++i) {
|
||||
const std::vector<uint8_t>& out = compressed_pieces[i];
|
||||
if (out_pos + out.size() > *encoded_size) {
|
||||
return false;
|
||||
}
|
||||
memcpy(&encoded_buffer[out_pos], &out[0], out.size());
|
||||
out_pos += out.size();
|
||||
}
|
||||
*encoded_size = out_pos;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,28 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// API for parallel Brotli compression
|
||||
// Note that this is only a proof of concept currently and not part of the
|
||||
// final API yet.
|
||||
|
||||
#ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
|
||||
#define BROTLI_ENC_ENCODE_PARALLEL_H_
|
||||
|
||||
|
||||
#include "./encode.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
int BrotliCompressBufferParallel(BrotliParams params,
|
||||
size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENCODE_PARALLEL_H_
|
|
@ -0,0 +1,480 @@
|
|||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Entropy encoding (Huffman) utilities.
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "./histogram.h"
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void SetDepth(const HuffmanTree &p,
|
||||
HuffmanTree *pool,
|
||||
uint8_t *depth,
|
||||
uint8_t level) {
|
||||
if (p.index_left_ >= 0) {
|
||||
++level;
|
||||
SetDepth(pool[p.index_left_], pool, depth, level);
|
||||
SetDepth(pool[p.index_right_or_value_], pool, depth, level);
|
||||
} else {
|
||||
depth[p.index_right_or_value_] = level;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the root nodes, least popular first.
|
||||
static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
||||
const HuffmanTree& v1) {
|
||||
if (v0.total_count_ != v1.total_count_) {
|
||||
return v0.total_count_ < v1.total_count_;
|
||||
}
|
||||
return v0.index_right_or_value_ > v1.index_right_or_value_;
|
||||
}
|
||||
|
||||
// This function will create a Huffman tree.
|
||||
//
|
||||
// The catch here is that the tree cannot be arbitrarily deep.
|
||||
// Brotli specifies a maximum depth of 15 bits for "code trees"
|
||||
// and 7 bits for "code length code trees."
|
||||
//
|
||||
// count_limit is the value that is to be faked as the minimum value
|
||||
// and this minimum value is raised until the tree matches the
|
||||
// maximum length requirement.
|
||||
//
|
||||
// This algorithm is not of excellent performance for very long data blocks,
|
||||
// especially when population counts are longer than 2**tree_limit, but
|
||||
// we are not planning to use this with extremely long blocks.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth) {
|
||||
// For block sizes below 64 kB, we never need to do a second iteration
|
||||
// of this loop. Probably all of our block sizes will be smaller than
|
||||
// that, so this loop is mostly of academic interest. If we actually
|
||||
// would need this, we would be better off with the Katajainen algorithm.
|
||||
for (uint32_t count_limit = 1; ; count_limit *= 2) {
|
||||
size_t n = 0;
|
||||
for (size_t i = length; i != 0;) {
|
||||
--i;
|
||||
if (data[i]) {
|
||||
const uint32_t count = std::max(data[i], count_limit);
|
||||
tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 1) {
|
||||
depth[tree[0].index_right_or_value_] = 1; // Only one element.
|
||||
break;
|
||||
}
|
||||
|
||||
std::sort(tree, tree + n, SortHuffmanTree);
|
||||
|
||||
// The nodes are:
|
||||
// [0, n): the sorted leaf nodes that we start with.
|
||||
// [n]: we add a sentinel here.
|
||||
// [n + 1, 2n): new parent nodes are added here, starting from
|
||||
// (n+1). These are naturally in ascending order.
|
||||
// [2n]: we add a sentinel at the end as well.
|
||||
// There will be (2n+1) elements at the end.
|
||||
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
|
||||
tree[n] = sentinel;
|
||||
tree[n + 1] = sentinel;
|
||||
|
||||
size_t i = 0; // Points to the next leaf node.
|
||||
size_t j = n + 1; // Points to the next non-leaf node.
|
||||
for (size_t k = n - 1; k != 0; --k) {
|
||||
size_t left, right;
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
left = i;
|
||||
++i;
|
||||
} else {
|
||||
left = j;
|
||||
++j;
|
||||
}
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
right = i;
|
||||
++i;
|
||||
} else {
|
||||
right = j;
|
||||
++j;
|
||||
}
|
||||
|
||||
// The sentinel node becomes the parent node.
|
||||
size_t j_end = 2 * n - k;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = static_cast<int16_t>(left);
|
||||
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
|
||||
|
||||
// Add back the last sentinel node.
|
||||
tree[j_end + 1] = sentinel;
|
||||
}
|
||||
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
||||
|
||||
// We need to pack the Huffman tree in tree_limit bits.
|
||||
// If this was not successful, add fake entities to the lowest values
|
||||
// and retry.
|
||||
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Reverse(uint8_t* v, size_t start, size_t end) {
|
||||
--end;
|
||||
while (start < end) {
|
||||
uint8_t tmp = v[start];
|
||||
v[start] = v[end];
|
||||
v[end] = tmp;
|
||||
++start;
|
||||
--end;
|
||||
}
|
||||
}
|
||||
|
||||
static void WriteHuffmanTreeRepetitions(
|
||||
const uint8_t previous_value,
|
||||
const uint8_t value,
|
||||
size_t repetitions,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
assert(repetitions > 0);
|
||||
if (previous_value != value) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions == 7) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
for (size_t i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
repetitions -= 3;
|
||||
size_t start = *tree_size;
|
||||
while (true) {
|
||||
tree[*tree_size] = 16;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x3;
|
||||
++(*tree_size);
|
||||
repetitions >>= 2;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
static void WriteHuffmanTreeRepetitionsZeros(
|
||||
size_t repetitions,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
if (repetitions == 11) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
for (size_t i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
repetitions -= 3;
|
||||
size_t start = *tree_size;
|
||||
while (true) {
|
||||
tree[*tree_size] = 17;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x7;
|
||||
++(*tree_size);
|
||||
repetitions >>= 3;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle) {
|
||||
size_t nonzero_count = 0;
|
||||
size_t stride;
|
||||
size_t limit;
|
||||
size_t sum;
|
||||
const size_t streak_limit = 1240;
|
||||
// Let's make the Huffman code more compatible with rle encoding.
|
||||
size_t i;
|
||||
for (i = 0; i < length; i++) {
|
||||
if (counts[i]) {
|
||||
++nonzero_count;
|
||||
}
|
||||
}
|
||||
if (nonzero_count < 16) {
|
||||
return;
|
||||
}
|
||||
while (length != 0 && counts[length - 1] == 0) {
|
||||
--length;
|
||||
}
|
||||
if (length == 0) {
|
||||
return; // All zeros.
|
||||
}
|
||||
// Now counts[0..length - 1] does not have trailing zeros.
|
||||
{
|
||||
size_t nonzeros = 0;
|
||||
uint32_t smallest_nonzero = 1 << 30;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (counts[i] != 0) {
|
||||
++nonzeros;
|
||||
if (smallest_nonzero > counts[i]) {
|
||||
smallest_nonzero = counts[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nonzeros < 5) {
|
||||
// Small histogram will model it well.
|
||||
return;
|
||||
}
|
||||
size_t zeros = length - nonzeros;
|
||||
if (smallest_nonzero < 4) {
|
||||
if (zeros < 6) {
|
||||
for (i = 1; i < length - 1; ++i) {
|
||||
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
|
||||
counts[i] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nonzeros < 28) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// 2) Let's mark all population counts that already can be encoded
|
||||
// with an rle code.
|
||||
memset(good_for_rle, 0, length);
|
||||
{
|
||||
// Let's not spoil any of the existing good rle codes.
|
||||
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
|
||||
uint32_t symbol = counts[0];
|
||||
size_t step = 0;
|
||||
for (i = 0; i <= length; ++i) {
|
||||
if (i == length || counts[i] != symbol) {
|
||||
if ((symbol == 0 && step >= 5) ||
|
||||
(symbol != 0 && step >= 7)) {
|
||||
size_t k;
|
||||
for (k = 0; k < step; ++k) {
|
||||
good_for_rle[i - k - 1] = 1;
|
||||
}
|
||||
}
|
||||
step = 1;
|
||||
if (i != length) {
|
||||
symbol = counts[i];
|
||||
}
|
||||
} else {
|
||||
++step;
|
||||
}
|
||||
}
|
||||
}
|
||||
// 3) Let's replace those population counts that lead to more rle codes.
|
||||
// Math here is in 24.8 fixed point representation.
|
||||
stride = 0;
|
||||
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
|
||||
sum = 0;
|
||||
for (i = 0; i <= length; ++i) {
|
||||
if (i == length || good_for_rle[i] ||
|
||||
(i != 0 && good_for_rle[i - 1]) ||
|
||||
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
|
||||
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
||||
size_t k;
|
||||
// The stride must end, collapse what we have, if we have enough (4).
|
||||
size_t count = (sum + stride / 2) / stride;
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
if (sum == 0) {
|
||||
// Don't make an all zeros stride to be upgraded to ones.
|
||||
count = 0;
|
||||
}
|
||||
for (k = 0; k < stride; ++k) {
|
||||
// We don't want to change value at counts[i],
|
||||
// that is already belonging to the next stride. Thus - 1.
|
||||
counts[i - k - 1] = static_cast<uint32_t>(count);
|
||||
}
|
||||
}
|
||||
stride = 0;
|
||||
sum = 0;
|
||||
if (i < length - 2) {
|
||||
// All interesting strides have a count of at least 4,
|
||||
// at least when non-zeros.
|
||||
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
|
||||
} else if (i < length) {
|
||||
limit = 256 * counts[i];
|
||||
} else {
|
||||
limit = 0;
|
||||
}
|
||||
}
|
||||
++stride;
|
||||
if (i != length) {
|
||||
sum += counts[i];
|
||||
if (stride >= 4) {
|
||||
limit = (256 * sum + stride / 2) / stride;
|
||||
}
|
||||
if (stride == 4) {
|
||||
limit += 120;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
||||
bool *use_rle_for_non_zero,
|
||||
bool *use_rle_for_zero) {
|
||||
size_t total_reps_zero = 0;
|
||||
size_t total_reps_non_zero = 0;
|
||||
size_t count_reps_zero = 1;
|
||||
size_t count_reps_non_zero = 1;
|
||||
for (size_t i = 0; i < length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
if (reps >= 3 && value == 0) {
|
||||
total_reps_zero += reps;
|
||||
++count_reps_zero;
|
||||
}
|
||||
if (reps >= 4 && value != 0) {
|
||||
total_reps_non_zero += reps;
|
||||
++count_reps_non_zero;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
*use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
|
||||
*use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
|
||||
}
|
||||
|
||||
void WriteHuffmanTree(const uint8_t* depth,
|
||||
size_t length,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
uint8_t previous_value = 8;
|
||||
|
||||
// Throw away trailing zeros.
|
||||
size_t new_length = length;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (depth[length - i - 1] == 0) {
|
||||
--new_length;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// First gather statistics on if it is a good idea to do rle.
|
||||
bool use_rle_for_non_zero = false;
|
||||
bool use_rle_for_zero = false;
|
||||
if (length > 50) {
|
||||
// Find rle coding for longer codes.
|
||||
// Shorter codes seem not to benefit from rle.
|
||||
DecideOverRleUse(depth, new_length,
|
||||
&use_rle_for_non_zero, &use_rle_for_zero);
|
||||
}
|
||||
|
||||
// Actual rle coding.
|
||||
for (size_t i = 0; i < new_length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
if ((value != 0 && use_rle_for_non_zero) ||
|
||||
(value == 0 && use_rle_for_zero)) {
|
||||
for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
}
|
||||
if (value == 0) {
|
||||
WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
|
||||
} else {
|
||||
WriteHuffmanTreeRepetitions(previous_value,
|
||||
value, reps, tree_size,
|
||||
tree, extra_bits_data);
|
||||
previous_value = value;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
uint16_t ReverseBits(int num_bits, uint16_t bits) {
|
||||
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
|
||||
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
|
||||
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
|
||||
};
|
||||
size_t retval = kLut[bits & 0xf];
|
||||
for (int i = 4; i < num_bits; i += 4) {
|
||||
retval <<= 4;
|
||||
bits = static_cast<uint16_t>(bits >> 4);
|
||||
retval |= kLut[bits & 0xf];
|
||||
}
|
||||
retval >>= (-num_bits & 0x3);
|
||||
return static_cast<uint16_t>(retval);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits) {
|
||||
// In Brotli, all bit depths are [1..15]
|
||||
// 0 bit depth means that the symbol does not exist.
|
||||
const int kMaxBits = 16; // 0..15 are values for bits
|
||||
uint16_t bl_count[kMaxBits] = { 0 };
|
||||
{
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
++bl_count[depth[i]];
|
||||
}
|
||||
bl_count[0] = 0;
|
||||
}
|
||||
uint16_t next_code[kMaxBits];
|
||||
next_code[0] = 0;
|
||||
{
|
||||
int code = 0;
|
||||
for (int bits = 1; bits < kMaxBits; ++bits) {
|
||||
code = (code + bl_count[bits - 1]) << 1;
|
||||
next_code[bits] = static_cast<uint16_t>(code);
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (depth[i]) {
|
||||
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,104 @@
|
|||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Entropy encoding (Huffman) utilities.
|
||||
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
|
||||
#include <string.h>
|
||||
#include "./histogram.h"
|
||||
#include "./prefix.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// A node of a Huffman tree.
|
||||
struct HuffmanTree {
|
||||
HuffmanTree() {}
|
||||
HuffmanTree(uint32_t count, int16_t left, int16_t right)
|
||||
: total_count_(count),
|
||||
index_left_(left),
|
||||
index_right_or_value_(right) {
|
||||
}
|
||||
uint32_t total_count_;
|
||||
int16_t index_left_;
|
||||
int16_t index_right_or_value_;
|
||||
};
|
||||
|
||||
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
|
||||
uint8_t *depth, uint8_t level);
|
||||
|
||||
// This function will create a Huffman tree.
|
||||
//
|
||||
// The (data,length) contains the population counts.
|
||||
// The tree_limit is the maximum bit depth of the Huffman codes.
|
||||
//
|
||||
// The depth contains the tree, i.e., how many bits are used for
|
||||
// the symbol.
|
||||
//
|
||||
// The actual Huffman tree is constructed in the tree[] array, which has to
|
||||
// be at least 2 * length + 1 long.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth);
|
||||
|
||||
// Change the population counts in a way that the consequent
|
||||
// Huffman tree compression, especially its rle-part will be more
|
||||
// likely to compress this data more efficiently.
|
||||
//
|
||||
// length contains the size of the histogram.
|
||||
// counts contains the population counts.
|
||||
// good_for_rle is a buffer of at least length size
|
||||
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle);
|
||||
|
||||
// Write a Huffman tree from bit depths into the bitstream representation
|
||||
// of a Huffman tree. The generated Huffman tree is to be compressed once
|
||||
// more using a Huffman tree
|
||||
void WriteHuffmanTree(const uint8_t* depth,
|
||||
size_t num,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data);
|
||||
|
||||
// Get the actual bit values for a tree of bit depths.
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits);
|
||||
|
||||
template<int kSize>
|
||||
struct EntropyCode {
|
||||
// How many bits for symbol.
|
||||
uint8_t depth_[kSize];
|
||||
// Actual bits used to represent the symbol.
|
||||
uint16_t bits_[kSize];
|
||||
// How many non-zero depth.
|
||||
int count_;
|
||||
// First four symbols with non-zero depth.
|
||||
int symbols_[4];
|
||||
};
|
||||
|
||||
static const int kCodeLengthCodes = 18;
|
||||
|
||||
// Literal entropy code.
|
||||
typedef EntropyCode<256> EntropyCodeLiteral;
|
||||
// Prefix entropy codes.
|
||||
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
|
||||
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
|
||||
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
|
||||
// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
|
||||
typedef EntropyCode<272> EntropyCodeContextMap;
|
||||
// Block type entropy code, 256 block types + 2 special symbols.
|
||||
typedef EntropyCode<258> EntropyCodeBlockType;
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
|
|
@ -0,0 +1,572 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Static entropy codes used for faster meta-block encoding.
|
||||
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
|
||||
#include "./prefix.h"
|
||||
#include "./types.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const uint8_t kCodeLengthDepth[18] = {
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
|
||||
};
|
||||
|
||||
static const uint8_t kStaticCommandCodeDepth[kNumCommandPrefixes] = {
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
||||
};
|
||||
|
||||
static const uint8_t kStaticDistanceCodeDepth[64] = {
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
};
|
||||
|
||||
static const uint32_t kCodeLengthBits[18] = {
|
||||
0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
|
||||
};
|
||||
|
||||
inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
|
||||
WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint64_t kZeroRepsBits[704] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
|
||||
0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
|
||||
0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
|
||||
0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
|
||||
0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
|
||||
0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
|
||||
0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
|
||||
0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
|
||||
0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
|
||||
0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
|
||||
0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
|
||||
0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
|
||||
0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
|
||||
0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
|
||||
0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
|
||||
0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
|
||||
0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
|
||||
0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
|
||||
0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
|
||||
0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
|
||||
0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
|
||||
0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
|
||||
0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
|
||||
0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
|
||||
0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
|
||||
0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
|
||||
0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
|
||||
0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
|
||||
0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
|
||||
0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
|
||||
0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
|
||||
0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
|
||||
0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
|
||||
0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
|
||||
0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
|
||||
0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
|
||||
0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
|
||||
0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
|
||||
0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
|
||||
0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
|
||||
0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
|
||||
0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
|
||||
0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
|
||||
0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
|
||||
0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
|
||||
0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
|
||||
0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
|
||||
0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
|
||||
0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
|
||||
0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
|
||||
0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
|
||||
0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
|
||||
0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
|
||||
0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
|
||||
0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
|
||||
0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
|
||||
0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
|
||||
0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
|
||||
0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
|
||||
0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
|
||||
0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
|
||||
0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
|
||||
0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
|
||||
0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
|
||||
0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
|
||||
0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
|
||||
0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
|
||||
0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
|
||||
0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
|
||||
0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
|
||||
0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
|
||||
0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
|
||||
0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
|
||||
0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
|
||||
0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
|
||||
0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
|
||||
0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
|
||||
0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
|
||||
0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
|
||||
0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
|
||||
0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
|
||||
0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
|
||||
0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
|
||||
0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
|
||||
0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
|
||||
0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
|
||||
0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
|
||||
0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
|
||||
0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
|
||||
0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
|
||||
0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
|
||||
0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
|
||||
0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
|
||||
0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
|
||||
0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
|
||||
0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
|
||||
0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
|
||||
0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
|
||||
0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
|
||||
0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
|
||||
0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
|
||||
0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
|
||||
0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
|
||||
0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
|
||||
0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
|
||||
0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
|
||||
0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
|
||||
0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
|
||||
0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
|
||||
0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
|
||||
0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
|
||||
0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
|
||||
0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
|
||||
0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
|
||||
0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
|
||||
0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
|
||||
0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
|
||||
0x06f9cb87, 0x08f9cb87,
|
||||
};
|
||||
|
||||
static const uint32_t kZeroRepsDepth[704] = {
|
||||
0, 4, 8, 7, 7, 7, 7, 7, 7, 7, 7, 11, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
|
||||
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
};
|
||||
|
||||
static const uint64_t kNonZeroRepsBits[704] = {
|
||||
0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
|
||||
0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
|
||||
0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
|
||||
0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
|
||||
0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
|
||||
0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
|
||||
0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
|
||||
0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
|
||||
0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
|
||||
0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
|
||||
0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
|
||||
0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
|
||||
0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
|
||||
0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
|
||||
0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
|
||||
0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
|
||||
0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
|
||||
0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
|
||||
0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
|
||||
0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
|
||||
0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
|
||||
0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
|
||||
0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
|
||||
0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
|
||||
0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
|
||||
0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
|
||||
0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
|
||||
0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
|
||||
0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
|
||||
0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
|
||||
0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
|
||||
0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
|
||||
0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
|
||||
0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
|
||||
0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
|
||||
0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
|
||||
0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
|
||||
0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
|
||||
0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
|
||||
0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
|
||||
0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
|
||||
0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
|
||||
0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
|
||||
0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
|
||||
0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
|
||||
0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
|
||||
0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
|
||||
0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
|
||||
0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
|
||||
0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
|
||||
0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
|
||||
0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
|
||||
0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
|
||||
0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
|
||||
0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
|
||||
0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
|
||||
0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
|
||||
0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
|
||||
0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
|
||||
0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
|
||||
0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
|
||||
0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
|
||||
0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
|
||||
0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
|
||||
0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
|
||||
0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
|
||||
0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
|
||||
0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
|
||||
0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
|
||||
0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
|
||||
0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
|
||||
0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
|
||||
0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
|
||||
0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
|
||||
0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
|
||||
0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
|
||||
0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
|
||||
0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
|
||||
0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
|
||||
0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
|
||||
0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
|
||||
0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
|
||||
0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
|
||||
0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
|
||||
0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
|
||||
0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
|
||||
0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
|
||||
0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
|
||||
0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
|
||||
0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
|
||||
0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
|
||||
0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
|
||||
0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
|
||||
0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
|
||||
0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
|
||||
0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
|
||||
0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
|
||||
0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
|
||||
0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
|
||||
0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
|
||||
0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
|
||||
0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
|
||||
0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
|
||||
0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
|
||||
0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
|
||||
0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
|
||||
0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
|
||||
0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
|
||||
0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
|
||||
0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
|
||||
0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
|
||||
0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
|
||||
0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
|
||||
0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
|
||||
0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
|
||||
0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
|
||||
0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
|
||||
0x2baeb6db, 0x3baeb6db,
|
||||
};
|
||||
|
||||
static const uint32_t kNonZeroRepsDepth[704] = {
|
||||
6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
};
|
||||
|
||||
static const uint16_t kStaticLiteralCodeBits[256] = {
|
||||
0, 128, 64, 192, 32, 160, 96, 224,
|
||||
16, 144, 80, 208, 48, 176, 112, 240,
|
||||
8, 136, 72, 200, 40, 168, 104, 232,
|
||||
24, 152, 88, 216, 56, 184, 120, 248,
|
||||
4, 132, 68, 196, 36, 164, 100, 228,
|
||||
20, 148, 84, 212, 52, 180, 116, 244,
|
||||
12, 140, 76, 204, 44, 172, 108, 236,
|
||||
28, 156, 92, 220, 60, 188, 124, 252,
|
||||
2, 130, 66, 194, 34, 162, 98, 226,
|
||||
18, 146, 82, 210, 50, 178, 114, 242,
|
||||
10, 138, 74, 202, 42, 170, 106, 234,
|
||||
26, 154, 90, 218, 58, 186, 122, 250,
|
||||
6, 134, 70, 198, 38, 166, 102, 230,
|
||||
22, 150, 86, 214, 54, 182, 118, 246,
|
||||
14, 142, 78, 206, 46, 174, 110, 238,
|
||||
30, 158, 94, 222, 62, 190, 126, 254,
|
||||
1, 129, 65, 193, 33, 161, 97, 225,
|
||||
17, 145, 81, 209, 49, 177, 113, 241,
|
||||
9, 137, 73, 201, 41, 169, 105, 233,
|
||||
25, 153, 89, 217, 57, 185, 121, 249,
|
||||
5, 133, 69, 197, 37, 165, 101, 229,
|
||||
21, 149, 85, 213, 53, 181, 117, 245,
|
||||
13, 141, 77, 205, 45, 173, 109, 237,
|
||||
29, 157, 93, 221, 61, 189, 125, 253,
|
||||
3, 131, 67, 195, 35, 163, 99, 227,
|
||||
19, 147, 83, 211, 51, 179, 115, 243,
|
||||
11, 139, 75, 203, 43, 171, 107, 235,
|
||||
27, 155, 91, 219, 59, 187, 123, 251,
|
||||
7, 135, 71, 199, 39, 167, 103, 231,
|
||||
23, 151, 87, 215, 55, 183, 119, 247,
|
||||
15, 143, 79, 207, 47, 175, 111, 239,
|
||||
31, 159, 95, 223, 63, 191, 127, 255,
|
||||
};
|
||||
|
||||
inline void StoreStaticLiteralHuffmanTree(size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
WriteBits(32, 0x00010003U, storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
|
||||
0, 256, 128, 384, 64, 320, 192, 448,
|
||||
32, 288, 160, 416, 96, 352, 224, 480,
|
||||
16, 272, 144, 400, 80, 336, 208, 464,
|
||||
48, 304, 176, 432, 112, 368, 240, 496,
|
||||
8, 264, 136, 392, 72, 328, 200, 456,
|
||||
40, 296, 168, 424, 104, 360, 232, 488,
|
||||
24, 280, 152, 408, 88, 344, 216, 472,
|
||||
56, 312, 184, 440, 120, 376, 248, 504,
|
||||
4, 260, 132, 388, 68, 324, 196, 452,
|
||||
36, 292, 164, 420, 100, 356, 228, 484,
|
||||
20, 276, 148, 404, 84, 340, 212, 468,
|
||||
52, 308, 180, 436, 116, 372, 244, 500,
|
||||
12, 268, 140, 396, 76, 332, 204, 460,
|
||||
44, 300, 172, 428, 108, 364, 236, 492,
|
||||
28, 284, 156, 412, 92, 348, 220, 476,
|
||||
60, 316, 188, 444, 124, 380, 252, 508,
|
||||
2, 258, 130, 386, 66, 322, 194, 450,
|
||||
34, 290, 162, 418, 98, 354, 226, 482,
|
||||
18, 274, 146, 402, 82, 338, 210, 466,
|
||||
50, 306, 178, 434, 114, 370, 242, 498,
|
||||
10, 266, 138, 394, 74, 330, 202, 458,
|
||||
42, 298, 170, 426, 106, 362, 234, 490,
|
||||
26, 282, 154, 410, 90, 346, 218, 474,
|
||||
58, 314, 186, 442, 122, 378, 250, 506,
|
||||
6, 262, 134, 390, 70, 326, 198, 454,
|
||||
38, 294, 166, 422, 102, 358, 230, 486,
|
||||
22, 278, 150, 406, 86, 342, 214, 470,
|
||||
54, 310, 182, 438, 118, 374, 246, 502,
|
||||
14, 270, 142, 398, 78, 334, 206, 462,
|
||||
46, 302, 174, 430, 110, 366, 238, 494,
|
||||
30, 286, 158, 414, 94, 350, 222, 478,
|
||||
62, 318, 190, 446, 126, 382, 254, 510,
|
||||
1, 257, 129, 385, 65, 321, 193, 449,
|
||||
33, 289, 161, 417, 97, 353, 225, 481,
|
||||
17, 273, 145, 401, 81, 337, 209, 465,
|
||||
49, 305, 177, 433, 113, 369, 241, 497,
|
||||
9, 265, 137, 393, 73, 329, 201, 457,
|
||||
41, 297, 169, 425, 105, 361, 233, 489,
|
||||
25, 281, 153, 409, 89, 345, 217, 473,
|
||||
57, 313, 185, 441, 121, 377, 249, 505,
|
||||
5, 261, 133, 389, 69, 325, 197, 453,
|
||||
37, 293, 165, 421, 101, 357, 229, 485,
|
||||
21, 277, 149, 405, 85, 341, 213, 469,
|
||||
53, 309, 181, 437, 117, 373, 245, 501,
|
||||
13, 269, 141, 397, 77, 333, 205, 461,
|
||||
45, 301, 173, 429, 109, 365, 237, 493,
|
||||
29, 285, 157, 413, 93, 349, 221, 477,
|
||||
61, 317, 189, 445, 125, 381, 253, 509,
|
||||
3, 259, 131, 387, 67, 323, 195, 451,
|
||||
35, 291, 163, 419, 99, 355, 227, 483,
|
||||
19, 275, 147, 403, 83, 339, 211, 467,
|
||||
51, 307, 179, 435, 115, 371, 243, 499,
|
||||
11, 267, 139, 395, 75, 331, 203, 459,
|
||||
43, 299, 171, 427, 107, 363, 235, 491,
|
||||
27, 283, 155, 411, 91, 347, 219, 475,
|
||||
59, 315, 187, 443, 123, 379, 251, 507,
|
||||
7, 1031, 519, 1543, 263, 1287, 775, 1799,
|
||||
135, 1159, 647, 1671, 391, 1415, 903, 1927,
|
||||
71, 1095, 583, 1607, 327, 1351, 839, 1863,
|
||||
199, 1223, 711, 1735, 455, 1479, 967, 1991,
|
||||
39, 1063, 551, 1575, 295, 1319, 807, 1831,
|
||||
167, 1191, 679, 1703, 423, 1447, 935, 1959,
|
||||
103, 1127, 615, 1639, 359, 1383, 871, 1895,
|
||||
231, 1255, 743, 1767, 487, 1511, 999, 2023,
|
||||
23, 1047, 535, 1559, 279, 1303, 791, 1815,
|
||||
151, 1175, 663, 1687, 407, 1431, 919, 1943,
|
||||
87, 1111, 599, 1623, 343, 1367, 855, 1879,
|
||||
215, 1239, 727, 1751, 471, 1495, 983, 2007,
|
||||
55, 1079, 567, 1591, 311, 1335, 823, 1847,
|
||||
183, 1207, 695, 1719, 439, 1463, 951, 1975,
|
||||
119, 1143, 631, 1655, 375, 1399, 887, 1911,
|
||||
247, 1271, 759, 1783, 503, 1527, 1015, 2039,
|
||||
15, 1039, 527, 1551, 271, 1295, 783, 1807,
|
||||
143, 1167, 655, 1679, 399, 1423, 911, 1935,
|
||||
79, 1103, 591, 1615, 335, 1359, 847, 1871,
|
||||
207, 1231, 719, 1743, 463, 1487, 975, 1999,
|
||||
47, 1071, 559, 1583, 303, 1327, 815, 1839,
|
||||
175, 1199, 687, 1711, 431, 1455, 943, 1967,
|
||||
111, 1135, 623, 1647, 367, 1391, 879, 1903,
|
||||
239, 1263, 751, 1775, 495, 1519, 1007, 2031,
|
||||
31, 1055, 543, 1567, 287, 1311, 799, 1823,
|
||||
159, 1183, 671, 1695, 415, 1439, 927, 1951,
|
||||
95, 1119, 607, 1631, 351, 1375, 863, 1887,
|
||||
223, 1247, 735, 1759, 479, 1503, 991, 2015,
|
||||
63, 1087, 575, 1599, 319, 1343, 831, 1855,
|
||||
191, 1215, 703, 1727, 447, 1471, 959, 1983,
|
||||
127, 1151, 639, 1663, 383, 1407, 895, 1919,
|
||||
255, 1279, 767, 1791, 511, 1535, 1023, 2047,
|
||||
};
|
||||
|
||||
inline void StoreStaticCommandHuffmanTree(size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
WriteBits(28, 0x0000000006307003U, storage_ix, storage);
|
||||
WriteBits(31, 0x0000000009262441U, storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint16_t kStaticDistanceCodeBits[64] = {
|
||||
0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
|
||||
2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
|
||||
1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
|
||||
3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63,
|
||||
};
|
||||
|
||||
inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
WriteBits(18, 0x000000000001dc03U, storage_ix, storage);
|
||||
WriteBits(10, 0x00000000000000daU, storage_ix, storage);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
|
@ -0,0 +1,139 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Utilities for fast computation of logarithms.
|
||||
|
||||
#ifndef BROTLI_ENC_FAST_LOG_H_
|
||||
#define BROTLI_ENC_FAST_LOG_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static inline uint32_t Log2FloorNonZero(size_t n) {
|
||||
#ifdef __GNUC__
|
||||
return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
|
||||
#else
|
||||
uint32_t result = 0;
|
||||
while (n >>= 1) result++;
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
// A lookup table for small values of log2(int) to be used in entropy
|
||||
// computation.
|
||||
//
|
||||
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
|
||||
static const float kLog2Table[] = {
|
||||
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
|
||||
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
|
||||
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
|
||||
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
|
||||
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
|
||||
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
|
||||
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
|
||||
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
|
||||
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
|
||||
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
|
||||
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
|
||||
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
|
||||
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
|
||||
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
|
||||
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
|
||||
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
|
||||
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
|
||||
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
|
||||
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
|
||||
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
|
||||
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
|
||||
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
|
||||
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
|
||||
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
|
||||
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
|
||||
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
|
||||
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
|
||||
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
|
||||
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
|
||||
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
|
||||
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
|
||||
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
|
||||
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
|
||||
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
|
||||
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
|
||||
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
|
||||
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
|
||||
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
|
||||
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
|
||||
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
|
||||
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
|
||||
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
|
||||
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
|
||||
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
|
||||
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
|
||||
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
|
||||
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
|
||||
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
|
||||
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
|
||||
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
|
||||
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
|
||||
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
|
||||
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
|
||||
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
|
||||
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
|
||||
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
|
||||
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
|
||||
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
|
||||
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
|
||||
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
|
||||
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
|
||||
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
|
||||
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
|
||||
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
|
||||
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
|
||||
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
|
||||
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
|
||||
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
|
||||
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
|
||||
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
|
||||
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
|
||||
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
|
||||
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
|
||||
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
|
||||
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
|
||||
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
|
||||
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
|
||||
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
|
||||
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
|
||||
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
|
||||
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
|
||||
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
|
||||
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
|
||||
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
|
||||
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
|
||||
7.9943534368588578f
|
||||
};
|
||||
|
||||
// Faster logarithm for small integers, with the property of log2(0) == 0.
|
||||
static inline double FastLog2(size_t v) {
|
||||
if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
|
||||
return kLog2Table[v];
|
||||
}
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1700
|
||||
// Visual Studio 2012 does not have the log2() function defined, so we use
|
||||
// log() and a multiplication instead.
|
||||
static const double kLog2Inv = 1.4426950408889634f;
|
||||
return log(static_cast<double>(v)) * kLog2Inv;
|
||||
#else
|
||||
return log2(static_cast<double>(v));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_FAST_LOG_H_
|
|
@ -0,0 +1,77 @@
|
|||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function to find maximal matching prefixes of strings.
|
||||
|
||||
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
|
||||
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Separate implementation for little-endian 64-bit targets, for speed.
|
||||
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
|
||||
|
||||
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
size_t matched = 0;
|
||||
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
|
||||
while (PREDICT_TRUE(--limit2)) {
|
||||
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
|
||||
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
|
||||
s2 += 8;
|
||||
matched += 8;
|
||||
} else {
|
||||
uint64_t x =
|
||||
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
|
||||
size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
|
||||
matched += matching_bits >> 3;
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
|
||||
while (--limit) {
|
||||
if (PREDICT_TRUE(s1[matched] == *s2)) {
|
||||
++s2;
|
||||
++matched;
|
||||
} else {
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
#else
|
||||
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
size_t matched = 0;
|
||||
const uint8_t* s2_limit = s2 + limit;
|
||||
const uint8_t* s2_ptr = s2;
|
||||
// Find out how long the match is. We loop over the data 32 bits at a
|
||||
// time until we find a 32-bit block that doesn't match; then we find
|
||||
// the first non-matching bit and use that to calculate the total
|
||||
// length of the match.
|
||||
while (s2_ptr <= s2_limit - 4 &&
|
||||
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
|
||||
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
|
||||
s2_ptr += 4;
|
||||
matched += 4;
|
||||
}
|
||||
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
|
||||
++s2_ptr;
|
||||
++matched;
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
|
@ -0,0 +1,974 @@
|
|||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// A (forgetful) hash table to the data seen by the compressor, to
|
||||
// help create backward references to previous data.
|
||||
|
||||
#ifndef BROTLI_ENC_HASH_H_
|
||||
#define BROTLI_ENC_HASH_H_
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
|
||||
#include "./dictionary_hash.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
#include "./prefix.h"
|
||||
#include "./static_dict.h"
|
||||
#include "./transform.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const size_t kMaxTreeSearchDepth = 64;
|
||||
static const size_t kMaxTreeCompLength = 128;
|
||||
|
||||
static const uint32_t kDistanceCacheIndex[] = {
|
||||
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const int kDistanceCacheOffset[] = {
|
||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||
};
|
||||
|
||||
static const uint32_t kCutoffTransformsCount = 10;
|
||||
static const uint8_t kCutoffTransforms[] = {
|
||||
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
|
||||
};
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
template<int kShiftBits>
|
||||
inline uint32_t Hash(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
return h >> (32 - kShiftBits);
|
||||
}
|
||||
|
||||
// Usually, we always choose the longest backward reference. This function
|
||||
// allows for the exception of that rule.
|
||||
//
|
||||
// If we choose a backward reference that is further away, it will
|
||||
// usually be coded with more bits. We approximate this by assuming
|
||||
// log2(distance). If the distance can be expressed in terms of the
|
||||
// last four distances, we use some heuristic constants to estimate
|
||||
// the bits cost. For the first up to four literals we use the bit
|
||||
// cost of the literals from the literal cost model, after that we
|
||||
// use the average bit cost of the cost model.
|
||||
//
|
||||
// This function is used to sometimes discard a longer backward reference
|
||||
// when it is not much longer and the bit cost for encoding it is more
|
||||
// than the saved literals.
|
||||
//
|
||||
// backward_reference_offset MUST be positive.
|
||||
inline double BackwardReferenceScore(size_t copy_length,
|
||||
size_t backward_reference_offset) {
|
||||
return 5.4 * static_cast<double>(copy_length) -
|
||||
1.20 * Log2FloorNonZero(backward_reference_offset);
|
||||
}
|
||||
|
||||
inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
|
||||
size_t distance_short_code) {
|
||||
static const double kDistanceShortCodeBitCost[16] = {
|
||||
-0.6, 0.95, 1.17, 1.27,
|
||||
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
|
||||
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
|
||||
};
|
||||
return 5.4 * static_cast<double>(copy_length) -
|
||||
kDistanceShortCodeBitCost[distance_short_code];
|
||||
}
|
||||
|
||||
struct BackwardMatch {
|
||||
BackwardMatch(void) : distance(0), length_and_code(0) {}
|
||||
|
||||
BackwardMatch(size_t dist, size_t len)
|
||||
: distance(static_cast<uint32_t>(dist))
|
||||
, length_and_code(static_cast<uint32_t>(len << 5)) {}
|
||||
|
||||
BackwardMatch(size_t dist, size_t len, size_t len_code)
|
||||
: distance(static_cast<uint32_t>(dist))
|
||||
, length_and_code(static_cast<uint32_t>(
|
||||
(len << 5) | (len == len_code ? 0 : len_code))) {}
|
||||
|
||||
size_t length(void) const {
|
||||
return length_and_code >> 5;
|
||||
}
|
||||
size_t length_code(void) const {
|
||||
size_t code = length_and_code & 31;
|
||||
return code ? code : length();
|
||||
}
|
||||
|
||||
uint32_t distance;
|
||||
uint32_t length_and_code;
|
||||
};
|
||||
|
||||
// A (forgetful) hash table to the data seen by the compressor, to
|
||||
// help create backward references to previous data.
|
||||
//
|
||||
// This is a hash map of fixed size (kBucketSize). Starting from the
|
||||
// given index, kBucketSweep buckets are used to store values of a key.
|
||||
template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
|
||||
class HashLongestMatchQuickly {
|
||||
public:
|
||||
HashLongestMatchQuickly(void) {
|
||||
Reset();
|
||||
}
|
||||
void Reset(void) {
|
||||
need_init_ = true;
|
||||
num_dict_lookups_ = 0;
|
||||
num_dict_matches_ = 0;
|
||||
}
|
||||
void Init(void) {
|
||||
if (need_init_) {
|
||||
// It is not strictly necessary to fill this buffer here, but
|
||||
// not filling will make the results of the compression stochastic
|
||||
// (but correct). This is because random data would cause the
|
||||
// system to find accidentally good backward references here and there.
|
||||
memset(&buckets_[0], 0, sizeof(buckets_));
|
||||
need_init_ = false;
|
||||
}
|
||||
}
|
||||
void InitForData(const uint8_t* data, size_t num) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
const uint32_t key = HashBytes(&data[i]);
|
||||
memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
|
||||
need_init_ = false;
|
||||
}
|
||||
}
|
||||
// Look at 4 bytes at data.
|
||||
// Compute a hash from these, and store the value somewhere within
|
||||
// [ix .. ix+3].
|
||||
inline void Store(const uint8_t *data, const uint32_t ix) {
|
||||
const uint32_t key = HashBytes(data);
|
||||
// Wiggle the value with the bucket sweep range.
|
||||
const uint32_t off = (ix >> 3) % kBucketSweep;
|
||||
buckets_[key + off] = ix;
|
||||
}
|
||||
|
||||
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
||||
// up to the length of max_length and stores the position cur_ix in the
|
||||
// hash table.
|
||||
//
|
||||
// Does not look for matches longer than max_length.
|
||||
// Does not look for matches further away than max_backward.
|
||||
// Writes the best found match length into best_len_out.
|
||||
// Writes the index (&data[index]) of the start of the best match into
|
||||
// best_distance_out.
|
||||
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
|
||||
const size_t ring_buffer_mask,
|
||||
const int* __restrict distance_cache,
|
||||
const size_t cur_ix,
|
||||
const size_t max_length,
|
||||
const size_t max_backward,
|
||||
size_t * __restrict best_len_out,
|
||||
size_t * __restrict best_len_code_out,
|
||||
size_t * __restrict best_distance_out,
|
||||
double* __restrict best_score_out) {
|
||||
const size_t best_len_in = *best_len_out;
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
|
||||
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
|
||||
double best_score = *best_score_out;
|
||||
size_t best_len = best_len_in;
|
||||
size_t cached_backward = static_cast<size_t>(distance_cache[0]);
|
||||
size_t prev_ix = cur_ix - cached_backward;
|
||||
bool match_found = false;
|
||||
if (prev_ix < cur_ix) {
|
||||
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
||||
if (compare_char == ring_buffer[prev_ix + best_len]) {
|
||||
size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||
&ring_buffer[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
|
||||
best_len = len;
|
||||
*best_len_out = len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = cached_backward;
|
||||
*best_score_out = best_score;
|
||||
compare_char = ring_buffer[cur_ix_masked + best_len];
|
||||
if (kBucketSweep == 1) {
|
||||
buckets_[key] = static_cast<uint32_t>(cur_ix);
|
||||
return true;
|
||||
} else {
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (kBucketSweep == 1) {
|
||||
// Only one to look for, don't bother to prepare for a loop.
|
||||
prev_ix = buckets_[key];
|
||||
buckets_[key] = static_cast<uint32_t>(cur_ix);
|
||||
size_t backward = cur_ix - prev_ix;
|
||||
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
||||
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
||||
return false;
|
||||
}
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
return false;
|
||||
}
|
||||
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||
&ring_buffer[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
*best_len_out = len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = BackwardReferenceScore(len, backward);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
uint32_t *bucket = buckets_ + key;
|
||||
prev_ix = *bucket++;
|
||||
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
|
||||
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
continue;
|
||||
}
|
||||
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||
&ring_buffer[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
const double score = BackwardReferenceScore(len, backward);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = score;
|
||||
compare_char = ring_buffer[cur_ix_masked + best_len];
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (kUseDictionary && !match_found &&
|
||||
num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
||||
++num_dict_lookups_;
|
||||
const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
|
||||
const uint16_t v = kStaticDictionaryHash[dict_key];
|
||||
if (v > 0) {
|
||||
const uint32_t len = v & 31;
|
||||
const uint32_t dist = v >> 5;
|
||||
const size_t offset =
|
||||
kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
||||
if (len <= max_length) {
|
||||
const size_t matchlen =
|
||||
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
|
||||
&kBrotliDictionary[offset], len);
|
||||
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
|
||||
const size_t transform_id = kCutoffTransforms[len - matchlen];
|
||||
const size_t word_id =
|
||||
transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
|
||||
dist;
|
||||
const size_t backward = max_backward + word_id + 1;
|
||||
const double score = BackwardReferenceScore(matchlen, backward);
|
||||
if (best_score < score) {
|
||||
++num_dict_matches_;
|
||||
best_score = score;
|
||||
best_len = matchlen;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
const uint32_t off = (cur_ix >> 3) % kBucketSweep;
|
||||
buckets_[key + off] = static_cast<uint32_t>(cur_ix);
|
||||
return match_found;
|
||||
}
|
||||
|
||||
enum { kHashLength = 5 };
|
||||
enum { kHashTypeLength = 8 };
|
||||
// HashBytes is the function that chooses the bucket to place
|
||||
// the address in. The HashLongestMatch and HashLongestMatchQuickly
|
||||
// classes have separate, different implementations of hashing.
|
||||
static uint32_t HashBytes(const uint8_t *data) {
|
||||
// Computing a hash based on 5 bytes works much better for
|
||||
// qualities 1 and 3, where the next hash value is likely to replace
|
||||
uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
return static_cast<uint32_t>(h >> (64 - kBucketBits));
|
||||
}
|
||||
|
||||
enum { kHashMapSize = 4 << kBucketBits };
|
||||
|
||||
private:
|
||||
static const uint32_t kBucketSize = 1 << kBucketBits;
|
||||
uint32_t buckets_[kBucketSize + kBucketSweep];
|
||||
// True if buckets_ array needs to be initialized.
|
||||
bool need_init_;
|
||||
size_t num_dict_lookups_;
|
||||
size_t num_dict_matches_;
|
||||
};
|
||||
|
||||
// A (forgetful) hash table to the data seen by the compressor, to
|
||||
// help create backward references to previous data.
|
||||
//
|
||||
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
||||
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
||||
// index positions of the given hash key in the compressed data.
|
||||
template <int kBucketBits,
|
||||
int kBlockBits,
|
||||
int kNumLastDistancesToCheck>
|
||||
class HashLongestMatch {
|
||||
public:
|
||||
HashLongestMatch(void) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void Reset(void) {
|
||||
need_init_ = true;
|
||||
num_dict_lookups_ = 0;
|
||||
num_dict_matches_ = 0;
|
||||
}
|
||||
|
||||
void Init(void) {
|
||||
if (need_init_) {
|
||||
memset(&num_[0], 0, sizeof(num_));
|
||||
need_init_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void InitForData(const uint8_t* data, size_t num) {
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
const uint32_t key = HashBytes(&data[i]);
|
||||
num_[key] = 0;
|
||||
need_init_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Look at 3 bytes at data.
|
||||
// Compute a hash from these, and store the value of ix at that position.
|
||||
inline void Store(const uint8_t *data, const uint32_t ix) {
|
||||
const uint32_t key = HashBytes(data);
|
||||
const int minor_ix = num_[key] & kBlockMask;
|
||||
buckets_[key][minor_ix] = ix;
|
||||
++num_[key];
|
||||
}
|
||||
|
||||
// Find a longest backward match of &data[cur_ix] up to the length of
|
||||
// max_length and stores the position cur_ix in the hash table.
|
||||
//
|
||||
// Does not look for matches longer than max_length.
|
||||
// Does not look for matches further away than max_backward.
|
||||
// Writes the best found match length into best_len_out.
|
||||
// Writes the index (&data[index]) offset from the start of the best match
|
||||
// into best_distance_out.
|
||||
// Write the score of the best match into best_score_out.
|
||||
bool FindLongestMatch(const uint8_t * __restrict data,
|
||||
const size_t ring_buffer_mask,
|
||||
const int* __restrict distance_cache,
|
||||
const size_t cur_ix,
|
||||
const size_t max_length,
|
||||
const size_t max_backward,
|
||||
size_t * __restrict best_len_out,
|
||||
size_t * __restrict best_len_code_out,
|
||||
size_t * __restrict best_distance_out,
|
||||
double * __restrict best_score_out) {
|
||||
*best_len_code_out = 0;
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
bool match_found = false;
|
||||
// Don't accept a short copy from far away.
|
||||
double best_score = *best_score_out;
|
||||
size_t best_len = *best_len_out;
|
||||
*best_len_out = 0;
|
||||
// Try last distance first.
|
||||
for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
|
||||
const size_t idx = kDistanceCacheIndex[i];
|
||||
const size_t backward =
|
||||
static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
|
||||
size_t prev_ix = static_cast<size_t>(cur_ix - backward);
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
continue;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 3 || (len == 2 && i < 2)) {
|
||||
// Comparing for >= 2 does not change the semantics, but just saves for
|
||||
// a few unnecessary binary logarithms in backward reference score,
|
||||
// since we are not interested in such short matches.
|
||||
double score = BackwardReferenceScoreUsingLastDistance(len, i);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
||||
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
||||
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
||||
for (size_t i = num_[key]; i > down;) {
|
||||
--i;
|
||||
size_t prev_ix = bucket[i & kBlockMask];
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
// Comparing for >= 3 does not change the semantics, but just saves
|
||||
// for a few unnecessary binary logarithms in backward reference
|
||||
// score, since we are not interested in such short matches.
|
||||
double score = BackwardReferenceScore(len, backward);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
|
||||
++num_[key];
|
||||
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
||||
size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
|
||||
for (int k = 0; k < 2; ++k, ++dict_key) {
|
||||
++num_dict_lookups_;
|
||||
const uint16_t v = kStaticDictionaryHash[dict_key];
|
||||
if (v > 0) {
|
||||
const size_t len = v & 31;
|
||||
const size_t dist = v >> 5;
|
||||
const size_t offset =
|
||||
kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
||||
if (len <= max_length) {
|
||||
const size_t matchlen =
|
||||
FindMatchLengthWithLimit(&data[cur_ix_masked],
|
||||
&kBrotliDictionary[offset], len);
|
||||
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
|
||||
const size_t transform_id = kCutoffTransforms[len - matchlen];
|
||||
const size_t word_id =
|
||||
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
||||
dist;
|
||||
const size_t backward = max_backward + word_id + 1;
|
||||
double score = BackwardReferenceScore(matchlen, backward);
|
||||
if (best_score < score) {
|
||||
++num_dict_matches_;
|
||||
best_score = score;
|
||||
best_len = matchlen;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return match_found;
|
||||
}
|
||||
|
||||
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
||||
// length of max_length and stores the position cur_ix in the hash table.
|
||||
//
|
||||
// Sets *num_matches to the number of matches found, and stores the found
|
||||
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
||||
// sorted by strictly increasing length and (non-strictly) increasing
|
||||
// distance.
|
||||
size_t FindAllMatches(const uint8_t* data,
|
||||
const size_t ring_buffer_mask,
|
||||
const size_t cur_ix,
|
||||
const size_t max_length,
|
||||
const size_t max_backward,
|
||||
BackwardMatch* matches) {
|
||||
BackwardMatch* const orig_matches = matches;
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
size_t best_len = 1;
|
||||
size_t stop = cur_ix - 64;
|
||||
if (cur_ix < 64) { stop = 0; }
|
||||
for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
|
||||
size_t prev_ix = i;
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (data[cur_ix_masked] != data[prev_ix] ||
|
||||
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
*matches++ = BackwardMatch(backward, len);
|
||||
}
|
||||
}
|
||||
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
||||
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
||||
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
||||
for (size_t i = num_[key]; i > down;) {
|
||||
--i;
|
||||
size_t prev_ix = bucket[i & kBlockMask];
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
*matches++ = BackwardMatch(backward, len);
|
||||
}
|
||||
}
|
||||
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
|
||||
++num_[key];
|
||||
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
|
||||
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
|
||||
dict_matches[i] = kInvalidMatch;
|
||||
}
|
||||
size_t minlen = std::max<size_t>(4, best_len + 1);
|
||||
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
||||
&dict_matches[0])) {
|
||||
size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
|
||||
for (size_t l = minlen; l <= maxlen; ++l) {
|
||||
uint32_t dict_id = dict_matches[l];
|
||||
if (dict_id < kInvalidMatch) {
|
||||
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
|
||||
dict_id & 31);
|
||||
}
|
||||
}
|
||||
}
|
||||
return static_cast<size_t>(matches - orig_matches);
|
||||
}
|
||||
|
||||
enum { kHashLength = 4 };
|
||||
enum { kHashTypeLength = 4 };
|
||||
|
||||
// HashBytes is the function that chooses the bucket to place
|
||||
// the address in. The HashLongestMatch and HashLongestMatchQuickly
|
||||
// classes have separate, different implementations of hashing.
|
||||
static uint32_t HashBytes(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
return h >> (32 - kBucketBits);
|
||||
}
|
||||
|
||||
enum { kHashMapSize = 2 << kBucketBits };
|
||||
|
||||
static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
|
||||
|
||||
private:
|
||||
// Number of hash buckets.
|
||||
static const uint32_t kBucketSize = 1 << kBucketBits;
|
||||
|
||||
// Only kBlockSize newest backward references are kept,
|
||||
// and the older are forgotten.
|
||||
static const uint32_t kBlockSize = 1 << kBlockBits;
|
||||
|
||||
// Mask for accessing entries in a block (in a ringbuffer manner).
|
||||
static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
|
||||
|
||||
// Number of entries in a particular bucket.
|
||||
uint16_t num_[kBucketSize];
|
||||
|
||||
// Buckets containing kBlockSize of backward references.
|
||||
uint32_t buckets_[kBucketSize][kBlockSize];
|
||||
|
||||
// True if num_ array needs to be initialized.
|
||||
bool need_init_;
|
||||
|
||||
size_t num_dict_lookups_;
|
||||
size_t num_dict_matches_;
|
||||
};
|
||||
|
||||
// A (forgetful) hash table where each hash bucket contains a binary tree of
|
||||
// sequences whose first 4 bytes share the same hash code.
|
||||
// Each sequence is kMaxTreeCompLength long and is identified by its starting
|
||||
// position in the input data. The binary tree is sorted by the lexicographic
|
||||
// order of the sequences, and it is also a max-heap with respect to the
|
||||
// starting positions.
|
||||
class HashToBinaryTree {
|
||||
public:
|
||||
HashToBinaryTree() : forest_(NULL) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
~HashToBinaryTree() {
|
||||
delete[] forest_;
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
need_init_ = true;
|
||||
}
|
||||
|
||||
void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
|
||||
if (need_init_) {
|
||||
window_mask_ = (1u << lgwin) - 1u;
|
||||
invalid_pos_ = static_cast<uint32_t>(-window_mask_);
|
||||
for (uint32_t i = 0; i < kBucketSize; i++) {
|
||||
buckets_[i] = invalid_pos_;
|
||||
}
|
||||
size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
|
||||
forest_ = new uint32_t[2 * num_nodes];
|
||||
need_init_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
||||
// length of max_length and stores the position cur_ix in the hash table.
|
||||
//
|
||||
// Sets *num_matches to the number of matches found, and stores the found
|
||||
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
||||
// sorted by strictly increasing length and (non-strictly) increasing
|
||||
// distance.
|
||||
size_t FindAllMatches(const uint8_t* data,
|
||||
const size_t ring_buffer_mask,
|
||||
const size_t cur_ix,
|
||||
const size_t max_length,
|
||||
const size_t max_backward,
|
||||
BackwardMatch* matches) {
|
||||
BackwardMatch* const orig_matches = matches;
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
size_t best_len = 1;
|
||||
size_t stop = cur_ix - 64;
|
||||
if (cur_ix < 64) { stop = 0; }
|
||||
for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
|
||||
size_t prev_ix = i;
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (data[cur_ix_masked] != data[prev_ix] ||
|
||||
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
*matches++ = BackwardMatch(backward, len);
|
||||
}
|
||||
}
|
||||
if (best_len < max_length) {
|
||||
matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
|
||||
max_length, &best_len, matches);
|
||||
}
|
||||
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
|
||||
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
|
||||
dict_matches[i] = kInvalidMatch;
|
||||
}
|
||||
size_t minlen = std::max<size_t>(4, best_len + 1);
|
||||
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
||||
&dict_matches[0])) {
|
||||
size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
|
||||
for (size_t l = minlen; l <= maxlen; ++l) {
|
||||
uint32_t dict_id = dict_matches[l];
|
||||
if (dict_id < kInvalidMatch) {
|
||||
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
|
||||
dict_id & 31);
|
||||
}
|
||||
}
|
||||
}
|
||||
return static_cast<size_t>(matches - orig_matches);
|
||||
}
|
||||
|
||||
// Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
||||
// current sequence, without returning any matches.
|
||||
// REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
|
||||
void Store(const uint8_t* data,
|
||||
const size_t ring_buffer_mask,
|
||||
const size_t cur_ix) {
|
||||
size_t best_len = 0;
|
||||
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
|
||||
&best_len, NULL);
|
||||
}
|
||||
|
||||
void StitchToPreviousBlock(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
|
||||
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
|
||||
// These could not be calculated before, since they require knowledge
|
||||
// of both the previous and the current block.
|
||||
const size_t i_start = position - kMaxTreeCompLength + 1;
|
||||
const size_t i_end = std::min(position, i_start + num_bytes);
|
||||
for (size_t i = i_start; i < i_end; ++i) {
|
||||
// We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
|
||||
// end of the current block and that we have at least
|
||||
// kMaxTreeCompLength tail in the ringbuffer.
|
||||
Store(ringbuffer, ringbuffer_mask, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
|
||||
|
||||
private:
|
||||
// Stores the hash of the next 4 bytes and in a single tree-traversal, the
|
||||
// hash bucket's binary tree is searched for matches and is re-rooted at the
|
||||
// current position.
|
||||
//
|
||||
// If less than kMaxTreeCompLength data is available, the hash bucket of the
|
||||
// current position is searched for matches, but the state of the hash table
|
||||
// is not changed, since we can not know the final sorting order of the
|
||||
// current (incomplete) sequence.
|
||||
//
|
||||
// This function must be called with increasing cur_ix positions.
|
||||
BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
|
||||
const size_t cur_ix,
|
||||
const size_t ring_buffer_mask,
|
||||
const size_t max_length,
|
||||
size_t* const __restrict best_len,
|
||||
BackwardMatch* __restrict matches) {
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
const size_t max_backward = window_mask_ - 15;
|
||||
const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
|
||||
const bool reroot_tree = max_length >= kMaxTreeCompLength;
|
||||
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
||||
size_t prev_ix = buckets_[key];
|
||||
// The forest index of the rightmost node of the left subtree of the new
|
||||
// root, updated as we traverse and reroot the tree of the hash bucket.
|
||||
size_t node_left = LeftChildIndex(cur_ix);
|
||||
// The forest index of the leftmost node of the right subtree of the new
|
||||
// root, updated as we traverse and reroot the tree of the hash bucket.
|
||||
size_t node_right = RightChildIndex(cur_ix);
|
||||
// The match length of the rightmost node of the left subtree of the new
|
||||
// root, updated as we traverse and reroot the tree of the hash bucket.
|
||||
size_t best_len_left = 0;
|
||||
// The match length of the leftmost node of the right subtree of the new
|
||||
// root, updated as we traverse and reroot the tree of the hash bucket.
|
||||
size_t best_len_right = 0;
|
||||
if (reroot_tree) {
|
||||
buckets_[key] = static_cast<uint32_t>(cur_ix);
|
||||
}
|
||||
for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
|
||||
if (backward == 0 || backward > max_backward || depth_remaining == 0) {
|
||||
if (reroot_tree) {
|
||||
forest_[node_left] = invalid_pos_;
|
||||
forest_[node_right] = invalid_pos_;
|
||||
}
|
||||
break;
|
||||
}
|
||||
const size_t cur_len = std::min(best_len_left, best_len_right);
|
||||
const size_t len = cur_len +
|
||||
FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
|
||||
&data[prev_ix_masked + cur_len],
|
||||
max_length - cur_len);
|
||||
if (len > *best_len) {
|
||||
*best_len = len;
|
||||
if (matches) {
|
||||
*matches++ = BackwardMatch(backward, len);
|
||||
}
|
||||
if (len >= max_comp_len) {
|
||||
if (reroot_tree) {
|
||||
forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
|
||||
forest_[node_right] = forest_[RightChildIndex(prev_ix)];
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
|
||||
best_len_left = len;
|
||||
if (reroot_tree) {
|
||||
forest_[node_left] = static_cast<uint32_t>(prev_ix);
|
||||
}
|
||||
node_left = RightChildIndex(prev_ix);
|
||||
prev_ix = forest_[node_left];
|
||||
} else {
|
||||
best_len_right = len;
|
||||
if (reroot_tree) {
|
||||
forest_[node_right] = static_cast<uint32_t>(prev_ix);
|
||||
}
|
||||
node_right = LeftChildIndex(prev_ix);
|
||||
prev_ix = forest_[node_right];
|
||||
}
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
inline size_t LeftChildIndex(const size_t pos) {
|
||||
return 2 * (pos & window_mask_);
|
||||
}
|
||||
|
||||
inline size_t RightChildIndex(const size_t pos) {
|
||||
return 2 * (pos & window_mask_) + 1;
|
||||
}
|
||||
|
||||
static uint32_t HashBytes(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
return h >> (32 - kBucketBits);
|
||||
}
|
||||
|
||||
static const int kBucketBits = 17;
|
||||
static const size_t kBucketSize = 1 << kBucketBits;
|
||||
|
||||
// The window size minus 1
|
||||
size_t window_mask_;
|
||||
|
||||
// Hash table that maps the 4-byte hashes of the sequence to the last
|
||||
// position where this hash was found, which is the root of the binary
|
||||
// tree of sequences that share this hash bucket.
|
||||
uint32_t buckets_[kBucketSize];
|
||||
|
||||
// The union of the binary trees of each hash bucket. The root of the tree
|
||||
// corresponding to a hash is a sequence starting at buckets_[hash] and
|
||||
// the left and right children of a sequence starting at pos are
|
||||
// forest_[2 * pos] and forest_[2 * pos + 1].
|
||||
uint32_t* forest_;
|
||||
|
||||
// A position used to mark a non-existent sequence, i.e. a tree is empty if
|
||||
// its root is at invalid_pos_ and a node is a leaf if both its children
|
||||
// are at invalid_pos_.
|
||||
uint32_t invalid_pos_;
|
||||
|
||||
bool need_init_;
|
||||
};
|
||||
|
||||
struct Hashers {
|
||||
// For kBucketSweep == 1, enabling the dictionary lookup makes compression
|
||||
// a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
||||
// and html inputs.
|
||||
typedef HashLongestMatchQuickly<16, 1, true> H2;
|
||||
typedef HashLongestMatchQuickly<16, 2, false> H3;
|
||||
typedef HashLongestMatchQuickly<17, 4, true> H4;
|
||||
typedef HashLongestMatch<14, 4, 4> H5;
|
||||
typedef HashLongestMatch<14, 5, 4> H6;
|
||||
typedef HashLongestMatch<15, 6, 10> H7;
|
||||
typedef HashLongestMatch<15, 7, 10> H8;
|
||||
typedef HashLongestMatch<15, 8, 16> H9;
|
||||
typedef HashToBinaryTree H10;
|
||||
|
||||
Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
|
||||
hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
|
||||
|
||||
~Hashers(void) {
|
||||
delete hash_h2;
|
||||
delete hash_h3;
|
||||
delete hash_h4;
|
||||
delete hash_h5;
|
||||
delete hash_h6;
|
||||
delete hash_h7;
|
||||
delete hash_h8;
|
||||
delete hash_h9;
|
||||
delete hash_h10;
|
||||
}
|
||||
|
||||
void Init(int type) {
|
||||
switch (type) {
|
||||
case 2: hash_h2 = new H2; break;
|
||||
case 3: hash_h3 = new H3; break;
|
||||
case 4: hash_h4 = new H4; break;
|
||||
case 5: hash_h5 = new H5; break;
|
||||
case 6: hash_h6 = new H6; break;
|
||||
case 7: hash_h7 = new H7; break;
|
||||
case 8: hash_h8 = new H8; break;
|
||||
case 9: hash_h9 = new H9; break;
|
||||
case 10: hash_h10 = new H10; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Hasher>
|
||||
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
|
||||
hasher->Init();
|
||||
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
|
||||
hasher->Store(&dict[i], static_cast<uint32_t>(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Custom LZ77 window.
|
||||
void PrependCustomDictionary(
|
||||
int type, int lgwin, const size_t size, const uint8_t* dict) {
|
||||
switch (type) {
|
||||
case 2: WarmupHash(size, dict, hash_h2); break;
|
||||
case 3: WarmupHash(size, dict, hash_h3); break;
|
||||
case 4: WarmupHash(size, dict, hash_h4); break;
|
||||
case 5: WarmupHash(size, dict, hash_h5); break;
|
||||
case 6: WarmupHash(size, dict, hash_h6); break;
|
||||
case 7: WarmupHash(size, dict, hash_h7); break;
|
||||
case 8: WarmupHash(size, dict, hash_h8); break;
|
||||
case 9: WarmupHash(size, dict, hash_h9); break;
|
||||
case 10:
|
||||
hash_h10->Init(lgwin, 0, size, false);
|
||||
for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
|
||||
hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
H2* hash_h2;
|
||||
H3* hash_h3;
|
||||
H4* hash_h4;
|
||||
H5* hash_h5;
|
||||
H6* hash_h6;
|
||||
H7* hash_h7;
|
||||
H8* hash_h8;
|
||||
H9* hash_h9;
|
||||
H10* hash_h10;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HASH_H_
|
|
@ -0,0 +1,67 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Build per-context histograms of literals, commands and distance codes.
|
||||
|
||||
#include "./histogram.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
#include "./prefix.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void BuildHistograms(
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t start_pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const std::vector<ContextType>& context_modes,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms) {
|
||||
size_t pos = start_pos;
|
||||
BlockSplitIterator literal_it(literal_split);
|
||||
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
|
||||
BlockSplitIterator dist_it(dist_split);
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const Command &cmd = cmds[i];
|
||||
insert_and_copy_it.Next();
|
||||
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
||||
cmd.cmd_prefix_);
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
literal_it.Next();
|
||||
size_t context = (literal_it.type_ << kLiteralContextBits) +
|
||||
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
||||
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
||||
prev_byte2 = prev_byte;
|
||||
prev_byte = ringbuffer[pos & mask];
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len()) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
dist_it.Next();
|
||||
size_t context = (dist_it.type_ << kDistanceContextBits) +
|
||||
cmd.DistanceContext();
|
||||
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,94 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Models the histograms of literals, commands and distance codes.
|
||||
|
||||
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
||||
#define BROTLI_ENC_HISTOGRAM_H_
|
||||
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include "./context.h"
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./prefix.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
struct BlockSplit;
|
||||
|
||||
// A simple container for histograms of data in blocks.
|
||||
template<int kDataSize>
|
||||
struct Histogram {
|
||||
Histogram(void) {
|
||||
Clear();
|
||||
}
|
||||
void Clear(void) {
|
||||
memset(data_, 0, sizeof(data_));
|
||||
total_count_ = 0;
|
||||
bit_cost_ = std::numeric_limits<double>::infinity();
|
||||
}
|
||||
void Add(size_t val) {
|
||||
++data_[val];
|
||||
++total_count_;
|
||||
}
|
||||
void Remove(size_t val) {
|
||||
--data_[val];
|
||||
--total_count_;
|
||||
}
|
||||
template<typename DataType>
|
||||
void Add(const DataType *p, size_t n) {
|
||||
total_count_ += n;
|
||||
n += 1;
|
||||
while(--n) ++data_[*p++];
|
||||
}
|
||||
void AddHistogram(const Histogram& v) {
|
||||
total_count_ += v.total_count_;
|
||||
for (size_t i = 0; i < kDataSize; ++i) {
|
||||
data_[i] += v.data_[i];
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t data_[kDataSize];
|
||||
size_t total_count_;
|
||||
double bit_cost_;
|
||||
};
|
||||
|
||||
// Literal histogram.
|
||||
typedef Histogram<256> HistogramLiteral;
|
||||
// Prefix histograms.
|
||||
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
|
||||
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
|
||||
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
|
||||
// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
|
||||
typedef Histogram<272> HistogramContextMap;
|
||||
// Block type histogram, 256 block types + 2 special symbols.
|
||||
typedef Histogram<258> HistogramBlockType;
|
||||
|
||||
static const size_t kLiteralContextBits = 6;
|
||||
static const size_t kDistanceContextBits = 2;
|
||||
|
||||
void BuildHistograms(
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const std::vector<ContextType>& context_modes,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HISTOGRAM_H_
|
|
@ -0,0 +1,165 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Literal cost model to allow backward reference replacement to be efficient.
|
||||
|
||||
#include "./literal_cost.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "./fast_log.h"
|
||||
#include "./types.h"
|
||||
#include "./utf8_util.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
||||
if (c < 128) {
|
||||
return 0; // Next one is the 'Byte 1' again.
|
||||
} else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
|
||||
return std::min<size_t>(1, clamp);
|
||||
} else {
|
||||
// Let's decide over the last byte if this ends the sequence.
|
||||
if (last < 0xe0) {
|
||||
return 0; // Completed two or three byte coding.
|
||||
} else { // Next one is the 'Byte 3' of utf-8 encoding.
|
||||
return std::min<size_t>(2, clamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data) {
|
||||
size_t counts[3] = { 0 };
|
||||
size_t max_utf8 = 1; // should be 2, but 1 compresses better.
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
utf8_pos = UTF8Position(last_c, c, 2);
|
||||
++counts[utf8_pos];
|
||||
last_c = c;
|
||||
}
|
||||
if (counts[2] < 500) {
|
||||
max_utf8 = 1;
|
||||
}
|
||||
if (counts[1] + counts[2] < 25) {
|
||||
max_utf8 = 0;
|
||||
}
|
||||
return max_utf8;
|
||||
}
|
||||
|
||||
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
|
||||
// max_utf8 is 0 (normal ascii single byte modeling),
|
||||
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
||||
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
||||
size_t histogram[3][256] = { { 0 } };
|
||||
size_t window_half = 495;
|
||||
size_t in_window = std::min(window_half, len);
|
||||
size_t in_window_utf8[3] = { 0 };
|
||||
|
||||
// Bootstrap histograms.
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (size_t i = 0; i < in_window; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
++histogram[utf8_pos][c];
|
||||
++in_window_utf8[utf8_pos];
|
||||
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
last_c = c;
|
||||
}
|
||||
|
||||
// Compute bit costs with sliding window.
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
// Remove a byte in the past.
|
||||
size_t c = i < window_half + 1 ?
|
||||
0 : data[(pos + i - window_half - 1) & mask];
|
||||
size_t last_c = i < window_half + 2 ?
|
||||
0 : data[(pos + i - window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
||||
--in_window_utf8[utf8_pos2];
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
// Add a byte in the future.
|
||||
size_t c = data[(pos + i + window_half - 1) & mask];
|
||||
size_t last_c = data[(pos + i + window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
||||
++in_window_utf8[utf8_pos2];
|
||||
}
|
||||
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
||||
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
||||
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
size_t masked_pos = (pos + i) & mask;
|
||||
size_t histo = histogram[utf8_pos][data[masked_pos]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
||||
lit_cost += 0.02905;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
// Make the first bytes more expensive -- seems to help, not sure why.
|
||||
// Perhaps because the entropy source is changing its properties
|
||||
// rapidly in the beginning of the file, perhaps because the beginning
|
||||
// of the data is a statistical "anomaly".
|
||||
if (i < 2000) {
|
||||
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
|
||||
}
|
||||
cost[i] = static_cast<float>(lit_cost);
|
||||
}
|
||||
}
|
||||
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
||||
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
|
||||
return;
|
||||
}
|
||||
size_t histogram[256] = { 0 };
|
||||
size_t window_half = 2000;
|
||||
size_t in_window = std::min(window_half, len);
|
||||
|
||||
// Bootstrap histogram.
|
||||
for (size_t i = 0; i < in_window; ++i) {
|
||||
++histogram[data[(pos + i) & mask]];
|
||||
}
|
||||
|
||||
// Compute bit costs with sliding window.
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
// Remove a byte in the past.
|
||||
--histogram[data[(pos + i - window_half) & mask]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
// Add a byte in the future.
|
||||
++histogram[data[(pos + i + window_half) & mask]];
|
||||
++in_window;
|
||||
}
|
||||
size_t histo = histogram[data[(pos + i) & mask]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
double lit_cost = FastLog2(in_window) - FastLog2(histo);
|
||||
lit_cost += 0.029;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
cost[i] = static_cast<float>(lit_cost);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,24 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Literal cost model to allow backward reference replacement to be efficient.
|
||||
|
||||
#ifndef BROTLI_ENC_LITERAL_COST_H_
|
||||
#define BROTLI_ENC_LITERAL_COST_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Estimates how many bits the literals in the interval [pos, pos + len) in the
|
||||
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
||||
// to the cost[0..len) array.
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_LITERAL_COST_H_
|
|
@ -0,0 +1,539 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Algorithms for distributing the literals and commands of a metablock between
|
||||
// block types and contexts.
|
||||
|
||||
#include "./metablock.h"
|
||||
|
||||
#include "./block_splitter.h"
|
||||
#include "./context.h"
|
||||
#include "./cluster.h"
|
||||
#include "./histogram.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb) {
|
||||
SplitBlock(cmds, num_commands,
|
||||
ringbuffer, pos, mask,
|
||||
&mb->literal_split,
|
||||
&mb->command_split,
|
||||
&mb->distance_split);
|
||||
|
||||
std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
|
||||
literal_context_mode);
|
||||
|
||||
size_t num_literal_contexts =
|
||||
mb->literal_split.num_types << kLiteralContextBits;
|
||||
size_t num_distance_contexts =
|
||||
mb->distance_split.num_types << kDistanceContextBits;
|
||||
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
||||
mb->command_histograms.resize(mb->command_split.num_types);
|
||||
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
||||
BuildHistograms(cmds, num_commands,
|
||||
mb->literal_split,
|
||||
mb->command_split,
|
||||
mb->distance_split,
|
||||
ringbuffer,
|
||||
pos,
|
||||
mask,
|
||||
prev_byte,
|
||||
prev_byte2,
|
||||
literal_context_modes,
|
||||
&literal_histograms,
|
||||
&mb->command_histograms,
|
||||
&distance_histograms);
|
||||
|
||||
// Histogram ids need to fit in one byte.
|
||||
static const size_t kMaxNumberOfHistograms = 256;
|
||||
|
||||
ClusterHistograms(literal_histograms,
|
||||
1u << kLiteralContextBits,
|
||||
mb->literal_split.num_types,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->literal_histograms,
|
||||
&mb->literal_context_map);
|
||||
|
||||
ClusterHistograms(distance_histograms,
|
||||
1u << kDistanceContextBits,
|
||||
mb->distance_split.num_types,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->distance_histograms,
|
||||
&mb->distance_context_map);
|
||||
}
|
||||
|
||||
// Greedy block splitter for one block category (literal, command or distance).
|
||||
template<typename HistogramType>
|
||||
class BlockSplitter {
|
||||
public:
|
||||
BlockSplitter(size_t alphabet_size,
|
||||
size_t min_block_size,
|
||||
double split_threshold,
|
||||
size_t num_symbols,
|
||||
BlockSplit* split,
|
||||
std::vector<HistogramType>* histograms)
|
||||
: alphabet_size_(alphabet_size),
|
||||
min_block_size_(min_block_size),
|
||||
split_threshold_(split_threshold),
|
||||
num_blocks_(0),
|
||||
split_(split),
|
||||
histograms_(histograms),
|
||||
target_block_size_(min_block_size),
|
||||
block_size_(0),
|
||||
curr_histogram_ix_(0),
|
||||
merge_last_count_(0) {
|
||||
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
||||
// We have to allocate one more histogram than the maximum number of block
|
||||
// types for the current histogram when the meta-block is too big.
|
||||
size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
|
||||
split_->lengths.resize(max_num_blocks);
|
||||
split_->types.resize(max_num_blocks);
|
||||
histograms_->resize(max_num_types);
|
||||
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
||||
}
|
||||
|
||||
// Adds the next symbol to the current histogram. When the current histogram
|
||||
// reaches the target size, decides on merging the block.
|
||||
void AddSymbol(size_t symbol) {
|
||||
(*histograms_)[curr_histogram_ix_].Add(symbol);
|
||||
++block_size_;
|
||||
if (block_size_ == target_block_size_) {
|
||||
FinishBlock(/* is_final = */ false);
|
||||
}
|
||||
}
|
||||
|
||||
// Does either of three things:
|
||||
// (1) emits the current block with a new block type;
|
||||
// (2) emits the current block with the type of the second last block;
|
||||
// (3) merges the current block with the last block.
|
||||
void FinishBlock(bool is_final) {
|
||||
if (block_size_ < min_block_size_) {
|
||||
block_size_ = min_block_size_;
|
||||
}
|
||||
if (num_blocks_ == 0) {
|
||||
// Create first block.
|
||||
split_->lengths[0] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[0] = 0;
|
||||
last_entropy_[0] =
|
||||
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
++curr_histogram_ix_;
|
||||
block_size_ = 0;
|
||||
} else if (block_size_ > 0) {
|
||||
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
|
||||
alphabet_size_);
|
||||
HistogramType combined_histo[2];
|
||||
double combined_entropy[2];
|
||||
double diff[2];
|
||||
for (size_t j = 0; j < 2; ++j) {
|
||||
size_t last_histogram_ix = last_histogram_ix_[j];
|
||||
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
|
||||
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
|
||||
combined_entropy[j] = BitsEntropy(
|
||||
&combined_histo[j].data_[0], alphabet_size_);
|
||||
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
|
||||
}
|
||||
|
||||
if (split_->num_types < kMaxBlockTypes &&
|
||||
diff[0] > split_threshold_ &&
|
||||
diff[1] > split_threshold_) {
|
||||
// Create new block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
|
||||
last_histogram_ix_[1] = last_histogram_ix_[0];
|
||||
last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
last_entropy_[0] = entropy;
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
++curr_histogram_ix_;
|
||||
block_size_ = 0;
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else if (diff[1] < diff[0] - 20.0) {
|
||||
// Combine this block with second last block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
||||
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
||||
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
last_entropy_[0] = combined_entropy[1];
|
||||
++num_blocks_;
|
||||
block_size_ = 0;
|
||||
(*histograms_)[curr_histogram_ix_].Clear();
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else {
|
||||
// Combine this block with last block.
|
||||
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
|
||||
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
|
||||
last_entropy_[0] = combined_entropy[0];
|
||||
if (split_->num_types == 1) {
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
}
|
||||
block_size_ = 0;
|
||||
(*histograms_)[curr_histogram_ix_].Clear();
|
||||
if (++merge_last_count_ > 1) {
|
||||
target_block_size_ += min_block_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_final) {
|
||||
(*histograms_).resize(split_->num_types);
|
||||
split_->types.resize(num_blocks_);
|
||||
split_->lengths.resize(num_blocks_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static const uint16_t kMaxBlockTypes = 256;
|
||||
|
||||
// Alphabet size of particular block category.
|
||||
const size_t alphabet_size_;
|
||||
// We collect at least this many symbols for each block.
|
||||
const size_t min_block_size_;
|
||||
// We merge histograms A and B if
|
||||
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
// where A is the current histogram and B is the histogram of the last or the
|
||||
// second last block type.
|
||||
const double split_threshold_;
|
||||
|
||||
size_t num_blocks_;
|
||||
BlockSplit* split_; // not owned
|
||||
std::vector<HistogramType>* histograms_; // not owned
|
||||
|
||||
// The number of symbols that we want to collect before deciding on whether
|
||||
// or not to merge the block with a previous one or emit a new block.
|
||||
size_t target_block_size_;
|
||||
// The number of symbols in the current histogram.
|
||||
size_t block_size_;
|
||||
// Offset of the current histogram.
|
||||
size_t curr_histogram_ix_;
|
||||
// Offset of the histograms of the previous two block types.
|
||||
size_t last_histogram_ix_[2];
|
||||
// Entropy of the previous two block types.
|
||||
double last_entropy_[2];
|
||||
// The number of times we merged the current block with the last one.
|
||||
size_t merge_last_count_;
|
||||
};
|
||||
|
||||
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb) {
|
||||
size_t num_literals = 0;
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
num_literals += commands[i].insert_len_;
|
||||
}
|
||||
|
||||
BlockSplitter<HistogramLiteral> lit_blocks(
|
||||
256, 512, 400.0, num_literals,
|
||||
&mb->literal_split, &mb->literal_histograms);
|
||||
BlockSplitter<HistogramCommand> cmd_blocks(
|
||||
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
||||
&mb->command_split, &mb->command_histograms);
|
||||
BlockSplitter<HistogramDistance> dist_blocks(
|
||||
64, 512, 100.0, n_commands,
|
||||
&mb->distance_split, &mb->distance_histograms);
|
||||
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
|
||||
lit_blocks.FinishBlock(/* is_final = */ true);
|
||||
cmd_blocks.FinishBlock(/* is_final = */ true);
|
||||
dist_blocks.FinishBlock(/* is_final = */ true);
|
||||
}
|
||||
|
||||
// Greedy block splitter for one block category (literal, command or distance).
|
||||
// Gathers histograms for all context buckets.
|
||||
template<typename HistogramType>
|
||||
class ContextBlockSplitter {
|
||||
public:
|
||||
ContextBlockSplitter(size_t alphabet_size,
|
||||
size_t num_contexts,
|
||||
size_t min_block_size,
|
||||
double split_threshold,
|
||||
size_t num_symbols,
|
||||
BlockSplit* split,
|
||||
std::vector<HistogramType>* histograms)
|
||||
: alphabet_size_(alphabet_size),
|
||||
num_contexts_(num_contexts),
|
||||
max_block_types_(kMaxBlockTypes / num_contexts),
|
||||
min_block_size_(min_block_size),
|
||||
split_threshold_(split_threshold),
|
||||
num_blocks_(0),
|
||||
split_(split),
|
||||
histograms_(histograms),
|
||||
target_block_size_(min_block_size),
|
||||
block_size_(0),
|
||||
curr_histogram_ix_(0),
|
||||
last_entropy_(2 * num_contexts),
|
||||
merge_last_count_(0) {
|
||||
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
||||
// We have to allocate one more histogram than the maximum number of block
|
||||
// types for the current histogram when the meta-block is too big.
|
||||
size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
|
||||
split_->lengths.resize(max_num_blocks);
|
||||
split_->types.resize(max_num_blocks);
|
||||
histograms_->resize(max_num_types * num_contexts);
|
||||
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
||||
}
|
||||
|
||||
// Adds the next symbol to the current block type and context. When the
|
||||
// current block reaches the target size, decides on merging the block.
|
||||
void AddSymbol(size_t symbol, size_t context) {
|
||||
(*histograms_)[curr_histogram_ix_ + context].Add(symbol);
|
||||
++block_size_;
|
||||
if (block_size_ == target_block_size_) {
|
||||
FinishBlock(/* is_final = */ false);
|
||||
}
|
||||
}
|
||||
|
||||
// Does either of three things:
|
||||
// (1) emits the current block with a new block type;
|
||||
// (2) emits the current block with the type of the second last block;
|
||||
// (3) merges the current block with the last block.
|
||||
void FinishBlock(bool is_final) {
|
||||
if (block_size_ < min_block_size_) {
|
||||
block_size_ = min_block_size_;
|
||||
}
|
||||
if (num_blocks_ == 0) {
|
||||
// Create first block.
|
||||
split_->lengths[0] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[0] = 0;
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
last_entropy_[i] =
|
||||
BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
}
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
curr_histogram_ix_ += num_contexts_;
|
||||
block_size_ = 0;
|
||||
} else if (block_size_ > 0) {
|
||||
// Try merging the set of histograms for the current block type with the
|
||||
// respective set of histograms for the last and second last block types.
|
||||
// Decide over the split based on the total reduction of entropy across
|
||||
// all contexts.
|
||||
std::vector<double> entropy(num_contexts_);
|
||||
std::vector<HistogramType> combined_histo(2 * num_contexts_);
|
||||
std::vector<double> combined_entropy(2 * num_contexts_);
|
||||
double diff[2] = { 0.0 };
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
size_t curr_histo_ix = curr_histogram_ix_ + i;
|
||||
entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
|
||||
alphabet_size_);
|
||||
for (size_t j = 0; j < 2; ++j) {
|
||||
size_t jx = j * num_contexts_ + i;
|
||||
size_t last_histogram_ix = last_histogram_ix_[j] + i;
|
||||
combined_histo[jx] = (*histograms_)[curr_histo_ix];
|
||||
combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
|
||||
combined_entropy[jx] = BitsEntropy(
|
||||
&combined_histo[jx].data_[0], alphabet_size_);
|
||||
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
|
||||
}
|
||||
}
|
||||
|
||||
if (split_->num_types < max_block_types_ &&
|
||||
diff[0] > split_threshold_ &&
|
||||
diff[1] > split_threshold_) {
|
||||
// Create new block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
|
||||
last_histogram_ix_[1] = last_histogram_ix_[0];
|
||||
last_histogram_ix_[0] = split_->num_types * num_contexts_;
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
last_entropy_[i] = entropy[i];
|
||||
}
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
curr_histogram_ix_ += num_contexts_;
|
||||
block_size_ = 0;
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else if (diff[1] < diff[0] - 20.0) {
|
||||
// Combine this block with second last block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
||||
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
(*histograms_)[last_histogram_ix_[0] + i] =
|
||||
combined_histo[num_contexts_ + i];
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
last_entropy_[i] = combined_entropy[num_contexts_ + i];
|
||||
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
||||
}
|
||||
++num_blocks_;
|
||||
block_size_ = 0;
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else {
|
||||
// Combine this block with last block.
|
||||
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
(*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
|
||||
last_entropy_[i] = combined_entropy[i];
|
||||
if (split_->num_types == 1) {
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
}
|
||||
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
||||
}
|
||||
block_size_ = 0;
|
||||
if (++merge_last_count_ > 1) {
|
||||
target_block_size_ += min_block_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_final) {
|
||||
(*histograms_).resize(split_->num_types * num_contexts_);
|
||||
split_->types.resize(num_blocks_);
|
||||
split_->lengths.resize(num_blocks_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static const int kMaxBlockTypes = 256;
|
||||
|
||||
// Alphabet size of particular block category.
|
||||
const size_t alphabet_size_;
|
||||
const size_t num_contexts_;
|
||||
const size_t max_block_types_;
|
||||
// We collect at least this many symbols for each block.
|
||||
const size_t min_block_size_;
|
||||
// We merge histograms A and B if
|
||||
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
// where A is the current histogram and B is the histogram of the last or the
|
||||
// second last block type.
|
||||
const double split_threshold_;
|
||||
|
||||
size_t num_blocks_;
|
||||
BlockSplit* split_; // not owned
|
||||
std::vector<HistogramType>* histograms_; // not owned
|
||||
|
||||
// The number of symbols that we want to collect before deciding on whether
|
||||
// or not to merge the block with a previous one or emit a new block.
|
||||
size_t target_block_size_;
|
||||
// The number of symbols in the current histogram.
|
||||
size_t block_size_;
|
||||
// Offset of the current histogram.
|
||||
size_t curr_histogram_ix_;
|
||||
// Offset of the histograms of the previous two block types.
|
||||
size_t last_histogram_ix_[2];
|
||||
// Entropy of the previous two block types.
|
||||
std::vector<double> last_entropy_;
|
||||
// The number of times we merged the current block with the last one.
|
||||
size_t merge_last_count_;
|
||||
};
|
||||
|
||||
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
ContextType literal_context_mode,
|
||||
size_t num_contexts,
|
||||
const uint32_t* static_context_map,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb) {
|
||||
size_t num_literals = 0;
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
num_literals += commands[i].insert_len_;
|
||||
}
|
||||
|
||||
ContextBlockSplitter<HistogramLiteral> lit_blocks(
|
||||
256, num_contexts, 512, 400.0, num_literals,
|
||||
&mb->literal_split, &mb->literal_histograms);
|
||||
BlockSplitter<HistogramCommand> cmd_blocks(
|
||||
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
||||
&mb->command_split, &mb->command_histograms);
|
||||
BlockSplitter<HistogramDistance> dist_blocks(
|
||||
64, 512, 100.0, n_commands,
|
||||
&mb->distance_split, &mb->distance_histograms);
|
||||
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
|
||||
uint8_t literal = ringbuffer[pos & mask];
|
||||
lit_blocks.AddSymbol(literal, static_context_map[context]);
|
||||
prev_byte2 = prev_byte;
|
||||
prev_byte = literal;
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len()) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lit_blocks.FinishBlock(/* is_final = */ true);
|
||||
cmd_blocks.FinishBlock(/* is_final = */ true);
|
||||
dist_blocks.FinishBlock(/* is_final = */ true);
|
||||
|
||||
mb->literal_context_map.resize(
|
||||
mb->literal_split.num_types << kLiteralContextBits);
|
||||
for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
|
||||
for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
|
||||
mb->literal_context_map[(i << kLiteralContextBits) + j] =
|
||||
static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb) {
|
||||
uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
|
||||
for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
}
|
||||
for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
|
||||
&mb->command_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
}
|
||||
size_t num_distance_codes =
|
||||
kNumDistanceShortCodes + num_direct_distance_codes +
|
||||
(48u << distance_postfix_bits);
|
||||
for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(num_distance_codes,
|
||||
&mb->distance_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
}
|
||||
delete[] good_for_rle;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,80 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Algorithms for distributing the literals and commands of a metablock between
|
||||
// block types and contexts.
|
||||
|
||||
#ifndef BROTLI_ENC_METABLOCK_H_
|
||||
#define BROTLI_ENC_METABLOCK_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "./command.h"
|
||||
#include "./histogram.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
struct BlockSplit {
|
||||
BlockSplit(void) : num_types(0) {}
|
||||
|
||||
size_t num_types;
|
||||
std::vector<uint8_t> types;
|
||||
std::vector<uint32_t> lengths;
|
||||
};
|
||||
|
||||
struct MetaBlockSplit {
|
||||
BlockSplit literal_split;
|
||||
BlockSplit command_split;
|
||||
BlockSplit distance_split;
|
||||
std::vector<uint32_t> literal_context_map;
|
||||
std::vector<uint32_t> distance_context_map;
|
||||
std::vector<HistogramLiteral> literal_histograms;
|
||||
std::vector<HistogramCommand> command_histograms;
|
||||
std::vector<HistogramDistance> distance_histograms;
|
||||
};
|
||||
|
||||
// Uses the slow shortest-path block splitter and does context clustering.
|
||||
void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
// Uses a fast greedy block splitter that tries to merge current block with the
|
||||
// last or the second last block and does not do any context modeling.
|
||||
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
// Uses a fast greedy block splitter that tries to merge current block with the
|
||||
// last or the second last block and uses a static context clustering which
|
||||
// is the same for all block types.
|
||||
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
ContextType literal_context_mode,
|
||||
size_t num_contexts,
|
||||
const uint32_t* static_context_map,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
void OptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_METABLOCK_H_
|
|
@ -0,0 +1,142 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Macros for endianness, branch prediction and unaligned loads and stores.
|
||||
|
||||
#ifndef BROTLI_ENC_PORT_H_
|
||||
#define BROTLI_ENC_PORT_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "./types.h"
|
||||
|
||||
#if defined OS_LINUX || defined OS_CYGWIN
|
||||
#include <endian.h>
|
||||
#elif defined OS_FREEBSD
|
||||
#include <machine/endian.h>
|
||||
#elif defined OS_MACOSX
|
||||
#include <machine/endian.h>
|
||||
/* Let's try and follow the Linux convention */
|
||||
#define __BYTE_ORDER BYTE_ORDER
|
||||
#define __LITTLE_ENDIAN LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
// define the macro IS_LITTLE_ENDIAN
|
||||
// using the above endian definitions from endian.h if
|
||||
// endian.h was included
|
||||
#ifdef __BYTE_ORDER
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__LITTLE_ENDIAN__)
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
#endif // __BYTE_ORDER
|
||||
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
// Enable little-endian optimization for x64 architecture on Windows.
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
/* Compatibility with non-clang compilers. */
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
|
||||
(defined(__llvm__) && __has_builtin(__builtin_expect))
|
||||
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
||||
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
||||
#else
|
||||
#define PREDICT_FALSE(x) (x)
|
||||
#define PREDICT_TRUE(x) (x)
|
||||
#endif
|
||||
|
||||
// Portable handling of unaligned loads, stores, and copies.
|
||||
// On some platforms, like ARM, the copy functions can be more efficient
|
||||
// then a load and a store.
|
||||
|
||||
#if defined(ARCH_PIII) || \
|
||||
defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
|
||||
|
||||
// x86 and x86-64 can perform unaligned loads/stores directly;
|
||||
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
||||
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
|
||||
|
||||
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
||||
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
||||
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
|
||||
(*reinterpret_cast<uint64_t *>(_p) = (_val))
|
||||
|
||||
#elif defined(__arm__) && \
|
||||
!defined(__ARM_ARCH_5__) && \
|
||||
!defined(__ARM_ARCH_5T__) && \
|
||||
!defined(__ARM_ARCH_5TE__) && \
|
||||
!defined(__ARM_ARCH_5TEJ__) && \
|
||||
!defined(__ARM_ARCH_6__) && \
|
||||
!defined(__ARM_ARCH_6J__) && \
|
||||
!defined(__ARM_ARCH_6K__) && \
|
||||
!defined(__ARM_ARCH_6Z__) && \
|
||||
!defined(__ARM_ARCH_6ZK__) && \
|
||||
!defined(__ARM_ARCH_6T2__)
|
||||
|
||||
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
||||
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
||||
// do an unaligned read and rotate the words around a bit, or do the reads very
|
||||
// slowly (trip through kernel mode).
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
||||
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
||||
|
||||
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// These functions are provided for architectures that don't support
|
||||
// unaligned loads and stores.
|
||||
|
||||
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
||||
uint32_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // BROTLI_ENC_PORT_H_
|
|
@ -0,0 +1,79 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions for encoding of integers into prefix codes the amount of extra
|
||||
// bits, and the actual values of the extra bits.
|
||||
|
||||
#ifndef BROTLI_ENC_PREFIX_H_
|
||||
#define BROTLI_ENC_PREFIX_H_
|
||||
|
||||
#include "./fast_log.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const uint32_t kNumInsertLenPrefixes = 24;
|
||||
static const uint32_t kNumCopyLenPrefixes = 24;
|
||||
static const uint32_t kNumCommandPrefixes = 704;
|
||||
static const uint32_t kNumBlockLenPrefixes = 26;
|
||||
static const uint32_t kNumDistanceShortCodes = 16;
|
||||
static const uint32_t kNumDistancePrefixes = 520;
|
||||
|
||||
// Represents the range of values belonging to a prefix code:
|
||||
// [offset, offset + 2^nbits)
|
||||
struct PrefixCodeRange {
|
||||
uint32_t offset;
|
||||
uint32_t nbits;
|
||||
};
|
||||
|
||||
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
|
||||
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
|
||||
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
|
||||
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
|
||||
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
|
||||
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
|
||||
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
|
||||
{8433, 13}, {16625, 24}
|
||||
};
|
||||
|
||||
inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
|
||||
uint32_t* n_extra, uint32_t* extra) {
|
||||
*code = 0;
|
||||
while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
|
||||
++(*code);
|
||||
}
|
||||
*n_extra = kBlockLengthPrefixCode[*code].nbits;
|
||||
*extra = len - kBlockLengthPrefixCode[*code].offset;
|
||||
}
|
||||
|
||||
inline void PrefixEncodeCopyDistance(size_t distance_code,
|
||||
size_t num_direct_codes,
|
||||
size_t postfix_bits,
|
||||
uint16_t* code,
|
||||
uint32_t* extra_bits) {
|
||||
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
|
||||
*code = static_cast<uint16_t>(distance_code);
|
||||
*extra_bits = 0;
|
||||
return;
|
||||
}
|
||||
distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */
|
||||
distance_code += (1u << (postfix_bits + 2u)); /* > 0 */
|
||||
size_t bucket = Log2FloorNonZero(distance_code) - 1;
|
||||
size_t postfix_mask = (1 << postfix_bits) - 1;
|
||||
size_t postfix = distance_code & postfix_mask;
|
||||
size_t prefix = (distance_code >> bucket) & 1;
|
||||
size_t offset = (2 + prefix) << bucket;
|
||||
size_t nbits = bucket - postfix_bits;
|
||||
*code = static_cast<uint16_t>(
|
||||
(kNumDistanceShortCodes + num_direct_codes +
|
||||
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
||||
*extra_bits = static_cast<uint32_t>(
|
||||
(nbits << 24) | ((distance_code - offset) >> postfix_bits));
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_PREFIX_H_
|
|
@ -0,0 +1,145 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Sliding window over the input data.
|
||||
|
||||
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
||||
#define BROTLI_ENC_RINGBUFFER_H_
|
||||
|
||||
#include <cstdlib> /* free, realloc */
|
||||
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
||||
// data in a circular manner: writing a byte writes it to:
|
||||
// `position() % (1 << window_bits)'.
|
||||
// For convenience, the RingBuffer array contains another copy of the
|
||||
// first `1 << tail_bits' bytes:
|
||||
// buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
|
||||
// and another copy of the last two bytes:
|
||||
// buffer_[-1] == buffer_[(1 << window_bits) - 1] and
|
||||
// buffer_[-2] == buffer_[(1 << window_bits) - 2].
|
||||
class RingBuffer {
|
||||
public:
|
||||
RingBuffer(int window_bits, int tail_bits)
|
||||
: size_(1u << window_bits),
|
||||
mask_((1u << window_bits) - 1),
|
||||
tail_size_(1u << tail_bits),
|
||||
total_size_(size_ + tail_size_),
|
||||
cur_size_(0),
|
||||
pos_(0),
|
||||
data_(0),
|
||||
buffer_(0) {}
|
||||
|
||||
~RingBuffer(void) {
|
||||
free(data_);
|
||||
}
|
||||
|
||||
// Allocates or re-allocates data_ to the given length + plus some slack
|
||||
// region before and after. Fills the slack regions with zeros.
|
||||
inline void InitBuffer(const uint32_t buflen) {
|
||||
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
||||
cur_size_ = buflen;
|
||||
data_ = static_cast<uint8_t*>(realloc(
|
||||
data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
|
||||
buffer_ = data_ + 2;
|
||||
buffer_[-2] = buffer_[-1] = 0;
|
||||
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
||||
buffer_[cur_size_ + i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Push bytes into the ring buffer.
|
||||
void Write(const uint8_t *bytes, size_t n) {
|
||||
if (pos_ == 0 && n < tail_size_) {
|
||||
// Special case for the first write: to process the first block, we don't
|
||||
// need to allocate the whole ringbuffer and we don't need the tail
|
||||
// either. However, we do this memory usage optimization only if the
|
||||
// first write is less than the tail size, which is also the input block
|
||||
// size, otherwise it is likely that other blocks will follow and we
|
||||
// will need to reallocate to the full size anyway.
|
||||
pos_ = static_cast<uint32_t>(n);
|
||||
InitBuffer(pos_);
|
||||
memcpy(buffer_, bytes, n);
|
||||
return;
|
||||
}
|
||||
if (cur_size_ < total_size_) {
|
||||
// Lazily allocate the full buffer.
|
||||
InitBuffer(total_size_);
|
||||
// Initialize the last two bytes to zero, so that we don't have to worry
|
||||
// later when we copy the last two bytes to the first two positions.
|
||||
buffer_[size_ - 2] = 0;
|
||||
buffer_[size_ - 1] = 0;
|
||||
}
|
||||
const size_t masked_pos = pos_ & mask_;
|
||||
// The length of the writes is limited so that we do not need to worry
|
||||
// about a write
|
||||
WriteTail(bytes, n);
|
||||
if (PREDICT_TRUE(masked_pos + n <= size_)) {
|
||||
// A single write fits.
|
||||
memcpy(&buffer_[masked_pos], bytes, n);
|
||||
} else {
|
||||
// Split into two writes.
|
||||
// Copy into the end of the buffer, including the tail buffer.
|
||||
memcpy(&buffer_[masked_pos], bytes,
|
||||
std::min(n, total_size_ - masked_pos));
|
||||
// Copy into the beginning of the buffer
|
||||
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
|
||||
n - (size_ - masked_pos));
|
||||
}
|
||||
buffer_[-2] = buffer_[size_ - 2];
|
||||
buffer_[-1] = buffer_[size_ - 1];
|
||||
pos_ += static_cast<uint32_t>(n);
|
||||
if (pos_ > (1u << 30)) { /* Wrap, but preserve not-a-first-lap feature. */
|
||||
pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
|
||||
}
|
||||
}
|
||||
|
||||
void Reset(void) {
|
||||
pos_ = 0;
|
||||
}
|
||||
|
||||
// Logical cursor position in the ring buffer.
|
||||
uint32_t position(void) const { return pos_; }
|
||||
|
||||
// Bit mask for getting the physical position for a logical position.
|
||||
uint32_t mask(void) const { return mask_; }
|
||||
|
||||
uint8_t *start(void) { return &buffer_[0]; }
|
||||
const uint8_t *start(void) const { return &buffer_[0]; }
|
||||
|
||||
private:
|
||||
void WriteTail(const uint8_t *bytes, size_t n) {
|
||||
const size_t masked_pos = pos_ & mask_;
|
||||
if (PREDICT_FALSE(masked_pos < tail_size_)) {
|
||||
// Just fill the tail buffer with the beginning data.
|
||||
const size_t p = size_ + masked_pos;
|
||||
memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
|
||||
}
|
||||
}
|
||||
|
||||
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
|
||||
const uint32_t size_;
|
||||
const uint32_t mask_;
|
||||
const uint32_t tail_size_;
|
||||
const uint32_t total_size_;
|
||||
|
||||
uint32_t cur_size_;
|
||||
// Position to write in the ring buffer.
|
||||
uint32_t pos_;
|
||||
// The actual ring buffer containing the copy of the last two bytes, the data,
|
||||
// and the copy of the beginning as a tail.
|
||||
uint8_t *data_;
|
||||
// The start of the ringbuffer.
|
||||
uint8_t *buffer_;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_RINGBUFFER_H_
|
|
@ -0,0 +1,455 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#include "./static_dict.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "./dictionary.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./static_dict_lut.h"
|
||||
#include "./transform.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
inline uint32_t Hash(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
return h >> (32 - kDictNumBits);
|
||||
}
|
||||
|
||||
inline void AddMatch(size_t distance, size_t len, size_t len_code,
|
||||
uint32_t* matches) {
|
||||
uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
|
||||
matches[len] = std::min(matches[len], match);
|
||||
}
|
||||
|
||||
inline size_t DictMatchLength(const uint8_t* data,
|
||||
size_t id,
|
||||
size_t len,
|
||||
size_t maxlen) {
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
|
||||
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
|
||||
std::min(len, maxlen));
|
||||
}
|
||||
|
||||
inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
|
||||
if (w.len > max_length) return false;
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
|
||||
const uint8_t* dict = &kBrotliDictionary[offset];
|
||||
if (w.transform == 0) {
|
||||
// Match against base dictionary word.
|
||||
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
|
||||
} else if (w.transform == 10) {
|
||||
// Match against uppercase first transform.
|
||||
// Note that there are only ASCII uppercase words in the lookup table.
|
||||
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
||||
(dict[0] ^ 32) == data[0] &&
|
||||
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
||||
w.len - 1u);
|
||||
} else {
|
||||
// Match against uppercase all transform.
|
||||
// Note that there are only ASCII uppercase words in the lookup table.
|
||||
for (size_t i = 0; i < w.len; ++i) {
|
||||
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
||||
if ((dict[i] ^ 32) != data[i]) return false;
|
||||
} else {
|
||||
if (dict[i] != data[i]) return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
size_t min_length,
|
||||
size_t max_length,
|
||||
uint32_t* matches) {
|
||||
bool found_match = false;
|
||||
size_t key = Hash(data);
|
||||
size_t bucket = kStaticDictionaryBuckets[key];
|
||||
if (bucket != 0) {
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0) {
|
||||
const size_t matchlen = DictMatchLength(data, id, l, max_length);
|
||||
// Transform "" + kIdentity + ""
|
||||
if (matchlen == l) {
|
||||
AddMatch(id, l, l, matches);
|
||||
found_match = true;
|
||||
}
|
||||
// Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
|
||||
if (matchlen >= l - 1) {
|
||||
AddMatch(id + 12 * n, l - 1, l, matches);
|
||||
if (l + 2 < max_length &&
|
||||
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
|
||||
data[l + 2] == ' ') {
|
||||
AddMatch(id + 49 * n, l + 3, l, matches);
|
||||
}
|
||||
found_match = true;
|
||||
}
|
||||
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
|
||||
size_t minlen = min_length;
|
||||
if (l > 9) minlen = std::max(minlen, l - 9);
|
||||
size_t maxlen = std::min(matchlen, l - 2);
|
||||
for (size_t len = minlen; len <= maxlen; ++len) {
|
||||
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
|
||||
found_match = true;
|
||||
}
|
||||
if (matchlen < l || l + 6 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
const uint8_t* s = &data[l];
|
||||
// Transforms "" + kIdentity + <suffix>
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + n, l + 1, l, matches);
|
||||
if (s[1] == 'a') {
|
||||
if (s[2] == ' ') {
|
||||
AddMatch(id + 28 * n, l + 3, l, matches);
|
||||
} else if (s[2] == 's') {
|
||||
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 't') {
|
||||
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 'n') {
|
||||
if (s[3] == 'd' && s[4] == ' ') {
|
||||
AddMatch(id + 10 * n, l + 5, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[1] == 'b') {
|
||||
if (s[2] == 'y' && s[3] == ' ') {
|
||||
AddMatch(id + 38 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'i') {
|
||||
if (s[2] == 'n') {
|
||||
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 's') {
|
||||
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'f') {
|
||||
if (s[2] == 'o') {
|
||||
if (s[3] == 'r' && s[4] == ' ') {
|
||||
AddMatch(id + 25 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[2] == 'r') {
|
||||
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
|
||||
AddMatch(id + 37 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[1] == 'o') {
|
||||
if (s[2] == 'f') {
|
||||
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 'n') {
|
||||
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'n') {
|
||||
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
|
||||
AddMatch(id + 80 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[1] == 't') {
|
||||
if (s[2] == 'h') {
|
||||
if (s[3] == 'e') {
|
||||
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
|
||||
} else if (s[3] == 'a') {
|
||||
if (s[4] == 't' && s[5] == ' ') {
|
||||
AddMatch(id + 29 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[2] == 'o') {
|
||||
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'w') {
|
||||
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
|
||||
AddMatch(id + 35 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == '"') {
|
||||
AddMatch(id + 19 * n, l + 1, l, matches);
|
||||
if (s[1] == '>') {
|
||||
AddMatch(id + 21 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + 20 * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 31 * n, l + 2, l, matches);
|
||||
if (s[2] == 'T' && s[3] == 'h') {
|
||||
if (s[4] == 'e') {
|
||||
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
|
||||
} else if (s[4] == 'i') {
|
||||
if (s[5] == 's' && s[6] == ' ') {
|
||||
AddMatch(id + 75 * n, l + 7, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (s[0] == ',') {
|
||||
AddMatch(id + 76 * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 14 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '\n') {
|
||||
AddMatch(id + 22 * n, l + 1, l, matches);
|
||||
if (s[1] == '\t') {
|
||||
AddMatch(id + 50 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == ']') {
|
||||
AddMatch(id + 24 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '\'') {
|
||||
AddMatch(id + 36 * n, l + 1, l, matches);
|
||||
} else if (s[0] == ':') {
|
||||
AddMatch(id + 51 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + 57 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + 70 * n, l + 2, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + 86 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'a') {
|
||||
if (s[1] == 'l' && s[2] == ' ') {
|
||||
AddMatch(id + 84 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'e') {
|
||||
if (s[1] == 'd') {
|
||||
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
|
||||
} else if (s[1] == 'r') {
|
||||
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
|
||||
} else if (s[1] == 's') {
|
||||
if (s[2] == 't' && s[3] == ' ') {
|
||||
AddMatch(id + 95 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == 'f') {
|
||||
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
|
||||
AddMatch(id + 90 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'i') {
|
||||
if (s[1] == 'v') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
AddMatch(id + 92 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'z') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
AddMatch(id + 100 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == 'l') {
|
||||
if (s[1] == 'e') {
|
||||
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
|
||||
AddMatch(id + 93 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'y') {
|
||||
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'o') {
|
||||
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
|
||||
AddMatch(id + 106 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Set t=false for kUppercaseFirst and
|
||||
// t=true otherwise (kUppercaseAll) transform.
|
||||
const bool t = w.transform != kUppercaseFirst;
|
||||
if (!IsMatch(w, data, max_length)) {
|
||||
continue;
|
||||
}
|
||||
// Transform "" + kUppercase{First,All} + ""
|
||||
AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
|
||||
found_match = true;
|
||||
if (l + 1 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
// Transforms "" + kUppercase{First,All} + <suffix>
|
||||
const uint8_t* s = &data[l];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '"') {
|
||||
AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
|
||||
if (s[1] == '>') {
|
||||
AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == ',') {
|
||||
AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '\'') {
|
||||
AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Transforms with prefixes " " and "."
|
||||
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
||||
bool is_space = (data[0] == ' ');
|
||||
key = Hash(&data[1]);
|
||||
bucket = kStaticDictionaryBuckets[key];
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0) {
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kIdentity + "" and "." + kIdentity + ""
|
||||
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
||||
found_match = true;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
||||
const uint8_t* s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
|
||||
} else if (is_space) {
|
||||
if (s[0] == ',') {
|
||||
AddMatch(id + 103 * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 33 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + 71 * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 52 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + 81 * n, l + 3, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + 98 * n, l + 3, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (is_space) {
|
||||
// Set t=false for kUppercaseFirst and
|
||||
// t=true otherwise (kUppercaseAll) transform.
|
||||
const bool t = w.transform != kUppercaseFirst;
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kUppercase{First,All} + ""
|
||||
AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
|
||||
found_match = true;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kUppercase{First,All} + <suffix>
|
||||
const uint8_t* s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
|
||||
} else if (s[0] == ',') {
|
||||
if (!t) {
|
||||
AddMatch(id + 109 * n, l + 2, l, matches);
|
||||
}
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max_length >= 6) {
|
||||
// Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
|
||||
if ((data[1] == ' ' &&
|
||||
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
||||
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
||||
key = Hash(&data[2]);
|
||||
bucket = kStaticDictionaryBuckets[key];
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
|
||||
if (data[0] == 0xc2) {
|
||||
AddMatch(id + 102 * n, l + 2, l, matches);
|
||||
found_match = true;
|
||||
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
||||
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
||||
AddMatch(id + t * n, l + 3, l, matches);
|
||||
found_match = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max_length >= 9) {
|
||||
// Transforms with prefixes " the " and ".com/"
|
||||
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
||||
data[3] == 'e' && data[4] == ' ') ||
|
||||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
||||
data[3] == 'm' && data[4] == '/')) {
|
||||
key = Hash(&data[5]);
|
||||
bucket = kStaticDictionaryBuckets[key];
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
|
||||
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
||||
found_match = true;
|
||||
if (l + 5 < max_length) {
|
||||
const uint8_t* s = &data[l + 5];
|
||||
if (data[0] == ' ') {
|
||||
if (l + 8 < max_length &&
|
||||
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
|
||||
AddMatch(id + 62 * n, l + 9, l, matches);
|
||||
if (l + 12 < max_length &&
|
||||
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
|
||||
AddMatch(id + 73 * n, l + 13, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return found_match;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,32 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Class to model the static dictionary.
|
||||
|
||||
#ifndef BROTLI_ENC_STATIC_DICT_H_
|
||||
#define BROTLI_ENC_STATIC_DICT_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const size_t kMaxDictionaryMatchLen = 37;
|
||||
static const uint32_t kInvalidMatch = 0xfffffff;
|
||||
|
||||
// Matches data against static dictionary words, and for each length l,
|
||||
// for which a match is found, updates matches[l] to be the minimum possible
|
||||
// (distance << 5) + len_code.
|
||||
// Prerequisites:
|
||||
// matches array is at least kMaxDictionaryMatchLen + 1 long
|
||||
// all elements are initialized to kInvalidMatch
|
||||
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
size_t min_length,
|
||||
size_t max_length,
|
||||
uint32_t* matches);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_STATIC_DICT_H_
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,114 @@
|
|||
/* Copyright 2009 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Convience routines to make Brotli I/O classes from some memory containers and
|
||||
// files.
|
||||
|
||||
#include "./streams.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
BrotliMemOut::BrotliMemOut(void* buf, size_t len)
|
||||
: buf_(buf),
|
||||
len_(len),
|
||||
pos_(0) {}
|
||||
|
||||
void BrotliMemOut::Reset(void* buf, size_t len) {
|
||||
buf_ = buf;
|
||||
len_ = len;
|
||||
pos_ = 0;
|
||||
}
|
||||
|
||||
// Brotli output routine: copy n bytes to the output buffer.
|
||||
bool BrotliMemOut::Write(const void *buf, size_t n) {
|
||||
if (n + pos_ > len_)
|
||||
return false;
|
||||
char* p = reinterpret_cast<char*>(buf_) + pos_;
|
||||
memcpy(p, buf, n);
|
||||
pos_ += n;
|
||||
return true;
|
||||
}
|
||||
|
||||
BrotliStringOut::BrotliStringOut(std::string* buf, size_t max_size)
|
||||
: buf_(buf),
|
||||
max_size_(max_size) {
|
||||
assert(buf->empty());
|
||||
}
|
||||
|
||||
void BrotliStringOut::Reset(std::string* buf, size_t max_size) {
|
||||
buf_ = buf;
|
||||
max_size_ = max_size;
|
||||
}
|
||||
|
||||
// Brotli output routine: add n bytes to a string.
|
||||
bool BrotliStringOut::Write(const void *buf, size_t n) {
|
||||
if (buf_->size() + n > max_size_)
|
||||
return false;
|
||||
buf_->append(static_cast<const char*>(buf), n);
|
||||
return true;
|
||||
}
|
||||
|
||||
BrotliMemIn::BrotliMemIn(const void* buf, size_t len)
|
||||
: buf_(buf),
|
||||
len_(len),
|
||||
pos_(0) {}
|
||||
|
||||
void BrotliMemIn::Reset(const void* buf, size_t len) {
|
||||
buf_ = buf;
|
||||
len_ = len;
|
||||
pos_ = 0;
|
||||
}
|
||||
|
||||
// Brotli input routine: read the next chunk of memory.
|
||||
const void* BrotliMemIn::Read(size_t n, size_t* output) {
|
||||
if (pos_ == len_) {
|
||||
return NULL;
|
||||
}
|
||||
if (n > len_ - pos_)
|
||||
n = len_ - pos_;
|
||||
const char* p = reinterpret_cast<const char*>(buf_) + pos_;
|
||||
pos_ += n;
|
||||
*output = n;
|
||||
return p;
|
||||
}
|
||||
|
||||
BrotliFileIn::BrotliFileIn(FILE* f, size_t max_read_size)
|
||||
: f_(f),
|
||||
buf_(new char[max_read_size]),
|
||||
buf_size_(max_read_size) { }
|
||||
|
||||
BrotliFileIn::~BrotliFileIn(void) {
|
||||
delete[] buf_;
|
||||
}
|
||||
|
||||
const void* BrotliFileIn::Read(size_t n, size_t* bytes_read) {
|
||||
if (n > buf_size_) {
|
||||
n = buf_size_;
|
||||
} else if (n == 0) {
|
||||
return feof(f_) ? NULL : buf_;
|
||||
}
|
||||
*bytes_read = fread(buf_, 1, n, f_);
|
||||
if (*bytes_read == 0) {
|
||||
return NULL;
|
||||
} else {
|
||||
return buf_;
|
||||
}
|
||||
}
|
||||
|
||||
BrotliFileOut::BrotliFileOut(FILE* f) : f_(f) {}
|
||||
|
||||
bool BrotliFileOut::Write(const void* buf, size_t n) {
|
||||
if (fwrite(buf, n, 1, f_) != 1) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,121 @@
|
|||
/* Copyright 2009 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Input and output classes for streaming brotli compression.
|
||||
|
||||
#ifndef BROTLI_ENC_STREAMS_H_
|
||||
#define BROTLI_ENC_STREAMS_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Input interface for the compression routines.
|
||||
class BrotliIn {
|
||||
public:
|
||||
virtual ~BrotliIn(void) {}
|
||||
|
||||
// Return a pointer to the next block of input of at most n bytes.
|
||||
// Return the actual length in *nread.
|
||||
// At end of data, return NULL. Don't return NULL if there is more data
|
||||
// to read, even if called with n == 0.
|
||||
// Read will only be called if some of its bytes are needed.
|
||||
virtual const void* Read(size_t n, size_t* nread) = 0;
|
||||
};
|
||||
|
||||
// Output interface for the compression routines.
|
||||
class BrotliOut {
|
||||
public:
|
||||
virtual ~BrotliOut(void) {}
|
||||
|
||||
// Write n bytes of data from buf.
|
||||
// Return true if all written, false otherwise.
|
||||
virtual bool Write(const void *buf, size_t n) = 0;
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliIn objects from raw memory.
|
||||
class BrotliMemIn : public BrotliIn {
|
||||
public:
|
||||
BrotliMemIn(const void* buf, size_t len);
|
||||
|
||||
void Reset(const void* buf, size_t len);
|
||||
|
||||
// returns the amount of data consumed
|
||||
size_t position(void) const { return pos_; }
|
||||
|
||||
const void* Read(size_t n, size_t* OUTPUT);
|
||||
|
||||
private:
|
||||
const void* buf_; // start of input buffer
|
||||
size_t len_; // length of input
|
||||
size_t pos_; // current read position within input
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliOut objects from raw memory.
|
||||
class BrotliMemOut : public BrotliOut {
|
||||
public:
|
||||
BrotliMemOut(void* buf, size_t len);
|
||||
|
||||
void Reset(void* buf, size_t len);
|
||||
|
||||
// returns the amount of data written
|
||||
size_t position(void) const { return pos_; }
|
||||
|
||||
bool Write(const void* buf, size_t n);
|
||||
|
||||
private:
|
||||
void* buf_; // start of output buffer
|
||||
size_t len_; // length of output
|
||||
size_t pos_; // current write position within output
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliOut objects from a string.
|
||||
class BrotliStringOut : public BrotliOut {
|
||||
public:
|
||||
// Create a writer that appends its data to buf.
|
||||
// buf->size() will grow to at most max_size
|
||||
// buf is expected to be empty when constructing BrotliStringOut.
|
||||
BrotliStringOut(std::string* buf, size_t max_size);
|
||||
|
||||
void Reset(std::string* buf, size_t max_len);
|
||||
|
||||
bool Write(const void* buf, size_t n);
|
||||
|
||||
private:
|
||||
std::string* buf_; // start of output buffer
|
||||
size_t max_size_; // max length of output
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliIn object from a file.
|
||||
class BrotliFileIn : public BrotliIn {
|
||||
public:
|
||||
BrotliFileIn(FILE* f, size_t max_read_size);
|
||||
~BrotliFileIn(void);
|
||||
|
||||
const void* Read(size_t n, size_t* bytes_read);
|
||||
|
||||
private:
|
||||
FILE* f_;
|
||||
char* buf_;
|
||||
size_t buf_size_;
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliOut object from a file.
|
||||
class BrotliFileOut : public BrotliOut {
|
||||
public:
|
||||
explicit BrotliFileOut(FILE* f);
|
||||
|
||||
bool Write(const void* buf, size_t n);
|
||||
private:
|
||||
FILE* f_;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_STREAMS_H_
|
|
@ -0,0 +1,248 @@
|
|||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Transformations on dictionary words.
|
||||
|
||||
#ifndef BROTLI_ENC_TRANSFORM_H_
|
||||
#define BROTLI_ENC_TRANSFORM_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "./dictionary.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
enum WordTransformType {
|
||||
kIdentity = 0,
|
||||
kOmitLast1 = 1,
|
||||
kOmitLast2 = 2,
|
||||
kOmitLast3 = 3,
|
||||
kOmitLast4 = 4,
|
||||
kOmitLast5 = 5,
|
||||
kOmitLast6 = 6,
|
||||
kOmitLast7 = 7,
|
||||
kOmitLast8 = 8,
|
||||
kOmitLast9 = 9,
|
||||
kUppercaseFirst = 10,
|
||||
kUppercaseAll = 11,
|
||||
kOmitFirst1 = 12,
|
||||
kOmitFirst2 = 13,
|
||||
kOmitFirst3 = 14,
|
||||
kOmitFirst4 = 15,
|
||||
kOmitFirst5 = 16,
|
||||
kOmitFirst6 = 17,
|
||||
kOmitFirst7 = 18,
|
||||
kOmitFirst8 = 19,
|
||||
kOmitFirst9 = 20
|
||||
};
|
||||
|
||||
struct Transform {
|
||||
const char* prefix;
|
||||
WordTransformType word_transform;
|
||||
const char* suffix;
|
||||
};
|
||||
|
||||
static const Transform kTransforms[] = {
|
||||
{ "", kIdentity, "" },
|
||||
{ "", kIdentity, " " },
|
||||
{ " ", kIdentity, " " },
|
||||
{ "", kOmitFirst1, "" },
|
||||
{ "", kUppercaseFirst, " " },
|
||||
{ "", kIdentity, " the " },
|
||||
{ " ", kIdentity, "" },
|
||||
{ "s ", kIdentity, " " },
|
||||
{ "", kIdentity, " of " },
|
||||
{ "", kUppercaseFirst, "" },
|
||||
{ "", kIdentity, " and " },
|
||||
{ "", kOmitFirst2, "" },
|
||||
{ "", kOmitLast1, "" },
|
||||
{ ", ", kIdentity, " " },
|
||||
{ "", kIdentity, ", " },
|
||||
{ " ", kUppercaseFirst, " " },
|
||||
{ "", kIdentity, " in " },
|
||||
{ "", kIdentity, " to " },
|
||||
{ "e ", kIdentity, " " },
|
||||
{ "", kIdentity, "\"" },
|
||||
{ "", kIdentity, "." },
|
||||
{ "", kIdentity, "\">" },
|
||||
{ "", kIdentity, "\n" },
|
||||
{ "", kOmitLast3, "" },
|
||||
{ "", kIdentity, "]" },
|
||||
{ "", kIdentity, " for " },
|
||||
{ "", kOmitFirst3, "" },
|
||||
{ "", kOmitLast2, "" },
|
||||
{ "", kIdentity, " a " },
|
||||
{ "", kIdentity, " that " },
|
||||
{ " ", kUppercaseFirst, "" },
|
||||
{ "", kIdentity, ". " },
|
||||
{ ".", kIdentity, "" },
|
||||
{ " ", kIdentity, ", " },
|
||||
{ "", kOmitFirst4, "" },
|
||||
{ "", kIdentity, " with " },
|
||||
{ "", kIdentity, "'" },
|
||||
{ "", kIdentity, " from " },
|
||||
{ "", kIdentity, " by " },
|
||||
{ "", kOmitFirst5, "" },
|
||||
{ "", kOmitFirst6, "" },
|
||||
{ " the ", kIdentity, "" },
|
||||
{ "", kOmitLast4, "" },
|
||||
{ "", kIdentity, ". The " },
|
||||
{ "", kUppercaseAll, "" },
|
||||
{ "", kIdentity, " on " },
|
||||
{ "", kIdentity, " as " },
|
||||
{ "", kIdentity, " is " },
|
||||
{ "", kOmitLast7, "" },
|
||||
{ "", kOmitLast1, "ing " },
|
||||
{ "", kIdentity, "\n\t" },
|
||||
{ "", kIdentity, ":" },
|
||||
{ " ", kIdentity, ". " },
|
||||
{ "", kIdentity, "ed " },
|
||||
{ "", kOmitFirst9, "" },
|
||||
{ "", kOmitFirst7, "" },
|
||||
{ "", kOmitLast6, "" },
|
||||
{ "", kIdentity, "(" },
|
||||
{ "", kUppercaseFirst, ", " },
|
||||
{ "", kOmitLast8, "" },
|
||||
{ "", kIdentity, " at " },
|
||||
{ "", kIdentity, "ly " },
|
||||
{ " the ", kIdentity, " of " },
|
||||
{ "", kOmitLast5, "" },
|
||||
{ "", kOmitLast9, "" },
|
||||
{ " ", kUppercaseFirst, ", " },
|
||||
{ "", kUppercaseFirst, "\"" },
|
||||
{ ".", kIdentity, "(" },
|
||||
{ "", kUppercaseAll, " " },
|
||||
{ "", kUppercaseFirst, "\">" },
|
||||
{ "", kIdentity, "=\"" },
|
||||
{ " ", kIdentity, "." },
|
||||
{ ".com/", kIdentity, "" },
|
||||
{ " the ", kIdentity, " of the " },
|
||||
{ "", kUppercaseFirst, "'" },
|
||||
{ "", kIdentity, ". This " },
|
||||
{ "", kIdentity, "," },
|
||||
{ ".", kIdentity, " " },
|
||||
{ "", kUppercaseFirst, "(" },
|
||||
{ "", kUppercaseFirst, "." },
|
||||
{ "", kIdentity, " not " },
|
||||
{ " ", kIdentity, "=\"" },
|
||||
{ "", kIdentity, "er " },
|
||||
{ " ", kUppercaseAll, " " },
|
||||
{ "", kIdentity, "al " },
|
||||
{ " ", kUppercaseAll, "" },
|
||||
{ "", kIdentity, "='" },
|
||||
{ "", kUppercaseAll, "\"" },
|
||||
{ "", kUppercaseFirst, ". " },
|
||||
{ " ", kIdentity, "(" },
|
||||
{ "", kIdentity, "ful " },
|
||||
{ " ", kUppercaseFirst, ". " },
|
||||
{ "", kIdentity, "ive " },
|
||||
{ "", kIdentity, "less " },
|
||||
{ "", kUppercaseAll, "'" },
|
||||
{ "", kIdentity, "est " },
|
||||
{ " ", kUppercaseFirst, "." },
|
||||
{ "", kUppercaseAll, "\">" },
|
||||
{ " ", kIdentity, "='" },
|
||||
{ "", kUppercaseFirst, "," },
|
||||
{ "", kIdentity, "ize " },
|
||||
{ "", kUppercaseAll, "." },
|
||||
{ "\xc2\xa0", kIdentity, "" },
|
||||
{ " ", kIdentity, "," },
|
||||
{ "", kUppercaseFirst, "=\"" },
|
||||
{ "", kUppercaseAll, "=\"" },
|
||||
{ "", kIdentity, "ous " },
|
||||
{ "", kUppercaseAll, ", " },
|
||||
{ "", kUppercaseFirst, "='" },
|
||||
{ " ", kUppercaseFirst, "," },
|
||||
{ " ", kUppercaseAll, "=\"" },
|
||||
{ " ", kUppercaseAll, ", " },
|
||||
{ "", kUppercaseAll, "," },
|
||||
{ "", kUppercaseAll, "(" },
|
||||
{ "", kUppercaseAll, ". " },
|
||||
{ " ", kUppercaseAll, "." },
|
||||
{ "", kUppercaseAll, "='" },
|
||||
{ " ", kUppercaseAll, ". " },
|
||||
{ " ", kUppercaseFirst, "=\"" },
|
||||
{ " ", kUppercaseAll, "='" },
|
||||
{ " ", kUppercaseFirst, "='" },
|
||||
};
|
||||
|
||||
static const size_t kNumTransforms =
|
||||
sizeof(kTransforms) / sizeof(kTransforms[0]);
|
||||
|
||||
static const size_t kOmitLastNTransforms[10] = {
|
||||
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
||||
};
|
||||
|
||||
static size_t ToUpperCase(uint8_t *p, size_t len) {
|
||||
if (len == 1 || p[0] < 0xc0) {
|
||||
if (p[0] >= 'a' && p[0] <= 'z') {
|
||||
p[0] ^= 32;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (p[0] < 0xe0) {
|
||||
p[1] ^= 32;
|
||||
return 2;
|
||||
}
|
||||
if (len == 2) {
|
||||
return 2;
|
||||
}
|
||||
p[2] ^= 5;
|
||||
return 3;
|
||||
}
|
||||
|
||||
inline std::string TransformWord(
|
||||
WordTransformType transform_type, const uint8_t* word, size_t len) {
|
||||
if (transform_type <= kOmitLast9) {
|
||||
if (len <= static_cast<size_t>(transform_type)) {
|
||||
return std::string();
|
||||
}
|
||||
return std::string(word, word + len - transform_type);
|
||||
}
|
||||
|
||||
if (transform_type >= kOmitFirst1) {
|
||||
const size_t skip = transform_type - (kOmitFirst1 - 1);
|
||||
if (len <= skip) {
|
||||
return std::string();
|
||||
}
|
||||
return std::string(word + skip, word + len);
|
||||
}
|
||||
|
||||
std::string ret = std::string(word, word + len);
|
||||
uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
|
||||
if (transform_type == kUppercaseFirst) {
|
||||
ToUpperCase(uppercase, len);
|
||||
} else if (transform_type == kUppercaseAll) {
|
||||
size_t position = 0;
|
||||
while (position < len) {
|
||||
size_t step = ToUpperCase(uppercase, len - position);
|
||||
uppercase += step;
|
||||
position += step;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline std::string ApplyTransform(
|
||||
const Transform& t, const uint8_t* word, size_t len) {
|
||||
return std::string(t.prefix) +
|
||||
TransformWord(t.word_transform, word, len) + std::string(t.suffix);
|
||||
}
|
||||
|
||||
inline std::string GetTransformedDictionaryWord(size_t len_code,
|
||||
size_t word_id) {
|
||||
size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
|
||||
size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
|
||||
size_t t = word_id / num_words;
|
||||
size_t word_idx = word_id % num_words;
|
||||
offset += len_code * word_idx;
|
||||
const uint8_t* word = &kBrotliDictionary[offset];
|
||||
return ApplyTransform(kTransforms[t], word, len_code);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_TRANSFORM_H_
|
|
@ -0,0 +1,29 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Common types */
|
||||
|
||||
#ifndef BROTLI_ENC_TYPES_H_
|
||||
#define BROTLI_ENC_TYPES_H_
|
||||
|
||||
#include <stddef.h> /* for size_t */
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
typedef __int8 int8_t;
|
||||
typedef unsigned __int8 uint8_t;
|
||||
typedef __int16 int16_t;
|
||||
typedef unsigned __int16 uint16_t;
|
||||
typedef __int32 int32_t;
|
||||
typedef unsigned __int32 uint32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef __int64 int64_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
|
||||
|
||||
#define MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
|
||||
|
||||
#endif /* BROTLI_ENC_TYPES_H_ */
|
|
@ -0,0 +1,83 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Heuristics for deciding about the UTF8-ness of strings.
|
||||
|
||||
#include "./utf8_util.h"
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
namespace {
|
||||
|
||||
size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
||||
// ASCII
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
if (*symbol > 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
// 2-byte UTF8
|
||||
if (size > 1u &&
|
||||
(input[0] & 0xe0) == 0xc0 &&
|
||||
(input[1] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x1f) << 6) |
|
||||
(input[1] & 0x3f));
|
||||
if (*symbol > 0x7f) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
// 3-byte UFT8
|
||||
if (size > 2u &&
|
||||
(input[0] & 0xf0) == 0xe0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x0f) << 12) |
|
||||
((input[1] & 0x3f) << 6) |
|
||||
(input[2] & 0x3f));
|
||||
if (*symbol > 0x7ff) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
// 4-byte UFT8
|
||||
if (size > 3u &&
|
||||
(input[0] & 0xf8) == 0xf0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80 &&
|
||||
(input[3] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x07) << 18) |
|
||||
((input[1] & 0x3f) << 12) |
|
||||
((input[2] & 0x3f) << 6) |
|
||||
(input[3] & 0x3f));
|
||||
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
// Not UTF8, emit a special symbol above the UTF8-code space
|
||||
*symbol = 0x110000 | input[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Returns true if at least min_fraction of the data is UTF8-encoded.
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
size_t i = 0;
|
||||
while (i < length) {
|
||||
int symbol;
|
||||
size_t bytes_read = ParseAsUTF8(
|
||||
&symbol, &data[(pos + i) & mask], length - i);
|
||||
i += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return size_utf8 > min_fraction * static_cast<double>(length);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
|
@ -0,0 +1,25 @@
|
|||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Heuristics for deciding about the UTF8-ness of strings.
|
||||
|
||||
#ifndef BROTLI_ENC_UTF8_UTIL_H_
|
||||
#define BROTLI_ENC_UTF8_UTIL_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
|
||||
// Returns true if at least min_fraction of the bytes between pos and
|
||||
// pos + length in the (data, mask) ringbuffer is UTF8-encoded.
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_UTF8_UTIL_H_
|
|
@ -0,0 +1,84 @@
|
|||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Write bits into a byte array.
|
||||
|
||||
#ifndef BROTLI_ENC_WRITE_BITS_H_
|
||||
#define BROTLI_ENC_WRITE_BITS_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./port.h"
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
//#define BIT_WRITER_DEBUG
|
||||
|
||||
// This function writes bits into bytes in increasing addresses, and within
|
||||
// a byte least-significant-bit first.
|
||||
//
|
||||
// The function can write up to 56 bits in one go with WriteBits
|
||||
// Example: let's assume that 3 bits (Rs below) have been written already:
|
||||
//
|
||||
// BYTE-0 BYTE+1 BYTE+2
|
||||
//
|
||||
// 0000 0RRR 0000 0000 0000 0000
|
||||
//
|
||||
// Now, we could write 5 or less bits in MSB by just sifting by 3
|
||||
// and OR'ing to BYTE-0.
|
||||
//
|
||||
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
||||
// and locate the rest in BYTE+1, BYTE+2, etc.
|
||||
inline void WriteBits(size_t n_bits,
|
||||
uint64_t bits,
|
||||
size_t * __restrict pos,
|
||||
uint8_t * __restrict array) {
|
||||
#ifdef BIT_WRITER_DEBUG
|
||||
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
|
||||
#endif
|
||||
assert((bits >> n_bits) == 0);
|
||||
assert(n_bits <= 56);
|
||||
#ifdef IS_LITTLE_ENDIAN
|
||||
// This branch of the code can write up to 56 bits at a time,
|
||||
// 7 bits are lost by being perhaps already in *p and at least
|
||||
// 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
|
||||
// bits are in *p and we write 57 bits, then the next write will
|
||||
// access a byte that was never initialized).
|
||||
uint8_t *p = &array[*pos >> 3];
|
||||
uint64_t v = *p;
|
||||
v |= bits << (*pos & 7);
|
||||
BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
|
||||
*pos += n_bits;
|
||||
#else
|
||||
// implicit & 0xff is assumed for uint8_t arithmetics
|
||||
uint8_t *array_pos = &array[*pos >> 3];
|
||||
const size_t bits_reserved_in_first_byte = (*pos & 7);
|
||||
bits <<= bits_reserved_in_first_byte;
|
||||
*array_pos++ |= static_cast<uint8_t>(bits);
|
||||
for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
|
||||
bits_left_to_write >= 9;
|
||||
bits_left_to_write -= 8) {
|
||||
bits >>= 8;
|
||||
*array_pos++ = static_cast<uint8_t>(bits);
|
||||
}
|
||||
*array_pos = 0;
|
||||
*pos += n_bits;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
|
||||
#ifdef BIT_WRITER_DEBUG
|
||||
printf("WriteBitsPrepareStorage %10d\n", pos);
|
||||
#endif
|
||||
assert((pos & 7) == 0);
|
||||
array[pos >> 3] = 0;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_WRITE_BITS_H_
|
|
@ -0,0 +1,25 @@
|
|||
#brotli/tools
|
||||
|
||||
include ../shared.mk
|
||||
|
||||
BROTLI = ..
|
||||
ENCOBJ = $(BROTLI)/enc/*.o
|
||||
DECOBJ = $(BROTLI)/dec/*.o
|
||||
|
||||
EXECUTABLES=bro
|
||||
|
||||
EXE_OBJS=$(patsubst %, %.o, $(EXECUTABLES))
|
||||
|
||||
all : $(EXECUTABLES)
|
||||
|
||||
$(EXECUTABLES) : $(EXE_OBJS) deps
|
||||
$(CXX) $(LDFLAGS) $(ENCOBJ) $(DECOBJ) $@.o -o $@
|
||||
|
||||
deps :
|
||||
$(MAKE) -C $(BROTLI)/dec
|
||||
$(MAKE) -C $(BROTLI)/enc nodict
|
||||
|
||||
clean :
|
||||
rm -f $(OBJS) $(EXE_OBJS) $(EXECUTABLES)
|
||||
$(MAKE) -C $(BROTLI)/dec clean
|
||||
$(MAKE) -C $(BROTLI)/enc clean
|
|
@ -0,0 +1,335 @@
|
|||
/* Copyright 2014 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Example main() function for Brotli library. */
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <string>
|
||||
|
||||
#include "../dec/decode.h"
|
||||
#include "../enc/compressor.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#else
|
||||
#include <io.h>
|
||||
|
||||
#define STDIN_FILENO _fileno(stdin)
|
||||
#define STDOUT_FILENO _fileno(stdout)
|
||||
#define S_IRUSR S_IREAD
|
||||
#define S_IWUSR S_IWRITE
|
||||
#define fdopen _fdopen
|
||||
#define unlink _unlink
|
||||
|
||||
#define fopen ms_fopen
|
||||
#define open ms_open
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
|
||||
#define fseek _fseeki64
|
||||
#define ftell _ftelli64
|
||||
#endif
|
||||
|
||||
static inline FILE* ms_fopen(const char *filename, const char *mode) {
|
||||
FILE* result = 0;
|
||||
fopen_s(&result, filename, mode);
|
||||
return result;
|
||||
}
|
||||
|
||||
static inline int ms_open(const char *filename, int oflag, int pmode) {
|
||||
int result = -1;
|
||||
_sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
|
||||
return result;
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
|
||||
static bool ParseQuality(const char* s, int* quality) {
|
||||
if (s[0] >= '0' && s[0] <= '9') {
|
||||
*quality = s[0] - '0';
|
||||
if (s[1] >= '0' && s[1] <= '9') {
|
||||
*quality = *quality * 10 + s[1] - '0';
|
||||
return s[2] == 0;
|
||||
}
|
||||
return s[1] == 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void ParseArgv(int argc, char **argv,
|
||||
char **input_path,
|
||||
char **output_path,
|
||||
int *force,
|
||||
int *quality,
|
||||
int *decompress,
|
||||
int *repeat,
|
||||
int *verbose,
|
||||
int *lgwin) {
|
||||
*force = 0;
|
||||
*input_path = 0;
|
||||
*output_path = 0;
|
||||
*repeat = 1;
|
||||
*verbose = 0;
|
||||
*lgwin = 22;
|
||||
{
|
||||
size_t argv0_len = strlen(argv[0]);
|
||||
*decompress =
|
||||
argv0_len >= 5 && strcmp(&argv[0][argv0_len - 5], "unbro") == 0;
|
||||
}
|
||||
for (int k = 1; k < argc; ++k) {
|
||||
if (!strcmp("--force", argv[k]) ||
|
||||
!strcmp("-f", argv[k])) {
|
||||
if (*force != 0) {
|
||||
goto error;
|
||||
}
|
||||
*force = 1;
|
||||
continue;
|
||||
} else if (!strcmp("--decompress", argv[k]) ||
|
||||
!strcmp("--uncompress", argv[k]) ||
|
||||
!strcmp("-d", argv[k])) {
|
||||
*decompress = 1;
|
||||
continue;
|
||||
} else if (!strcmp("--verbose", argv[k]) ||
|
||||
!strcmp("-v", argv[k])) {
|
||||
if (*verbose != 0) {
|
||||
goto error;
|
||||
}
|
||||
*verbose = 1;
|
||||
continue;
|
||||
}
|
||||
if (k < argc - 1) {
|
||||
if (!strcmp("--input", argv[k]) ||
|
||||
!strcmp("--in", argv[k]) ||
|
||||
!strcmp("-i", argv[k])) {
|
||||
if (*input_path != 0) {
|
||||
goto error;
|
||||
}
|
||||
*input_path = argv[k + 1];
|
||||
++k;
|
||||
continue;
|
||||
} else if (!strcmp("--output", argv[k]) ||
|
||||
!strcmp("--out", argv[k]) ||
|
||||
!strcmp("-o", argv[k])) {
|
||||
if (*output_path != 0) {
|
||||
goto error;
|
||||
}
|
||||
*output_path = argv[k + 1];
|
||||
++k;
|
||||
continue;
|
||||
} else if (!strcmp("--quality", argv[k]) ||
|
||||
!strcmp("-q", argv[k])) {
|
||||
if (!ParseQuality(argv[k + 1], quality)) {
|
||||
goto error;
|
||||
}
|
||||
++k;
|
||||
continue;
|
||||
} else if (!strcmp("--repeat", argv[k]) ||
|
||||
!strcmp("-r", argv[k])) {
|
||||
if (!ParseQuality(argv[k + 1], repeat)) {
|
||||
goto error;
|
||||
}
|
||||
++k;
|
||||
continue;
|
||||
} else if (!strcmp("--window", argv[k]) ||
|
||||
!strcmp("-w", argv[k])) {
|
||||
if (!ParseQuality(argv[k + 1], lgwin)) {
|
||||
goto error;
|
||||
}
|
||||
if (*lgwin < 10 || *lgwin >= 25) {
|
||||
goto error;
|
||||
}
|
||||
++k;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
goto error;
|
||||
}
|
||||
return;
|
||||
error:
|
||||
fprintf(stderr,
|
||||
"Usage: %s [--force] [--quality n] [--decompress]"
|
||||
" [--input filename] [--output filename] [--repeat iters]"
|
||||
" [--verbose] [--window n]\n",
|
||||
argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static FILE* OpenInputFile(const char* input_path) {
|
||||
if (input_path == 0) {
|
||||
return fdopen(STDIN_FILENO, "rb");
|
||||
}
|
||||
FILE* f = fopen(input_path, "rb");
|
||||
if (f == 0) {
|
||||
perror("fopen");
|
||||
exit(1);
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
static FILE *OpenOutputFile(const char *output_path, const int force) {
|
||||
if (output_path == 0) {
|
||||
return fdopen(STDOUT_FILENO, "wb");
|
||||
}
|
||||
int excl = force ? 0 : O_EXCL;
|
||||
int fd = open(output_path, O_CREAT | excl | O_WRONLY | O_TRUNC,
|
||||
S_IRUSR | S_IWUSR);
|
||||
if (fd < 0) {
|
||||
if (!force) {
|
||||
struct stat statbuf;
|
||||
if (stat(output_path, &statbuf) == 0) {
|
||||
fprintf(stderr, "output file exists\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
perror("open");
|
||||
exit(1);
|
||||
}
|
||||
return fdopen(fd, "wb");
|
||||
}
|
||||
|
||||
static int64_t FileSize(char *path) {
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (f == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (fseek(f, 0L, SEEK_END) != 0) {
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
int64_t retval = ftell(f);
|
||||
if (fclose(f) != 0) {
|
||||
return -1;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
static const size_t kFileBufferSize = 65536;
|
||||
|
||||
static void Decompresss(FILE* fin, FILE* fout) {
|
||||
BrotliState* s = BrotliCreateState(NULL, NULL, NULL);
|
||||
if (!s) {
|
||||
fprintf(stderr, "out of memory\n");
|
||||
exit(1);
|
||||
}
|
||||
uint8_t* input = new uint8_t[kFileBufferSize];
|
||||
uint8_t* output = new uint8_t[kFileBufferSize];
|
||||
size_t total_out;
|
||||
size_t available_in;
|
||||
const uint8_t* next_in;
|
||||
size_t available_out = kFileBufferSize;
|
||||
uint8_t* next_out = output;
|
||||
BrotliResult result = BROTLI_RESULT_NEEDS_MORE_INPUT;
|
||||
while (1) {
|
||||
if (result == BROTLI_RESULT_NEEDS_MORE_INPUT) {
|
||||
if (feof(fin)) {
|
||||
break;
|
||||
}
|
||||
available_in = fread(input, 1, kFileBufferSize, fin);
|
||||
next_in = input;
|
||||
if (ferror(fin)) {
|
||||
break;
|
||||
}
|
||||
} else if (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
|
||||
fwrite(output, 1, kFileBufferSize, fout);
|
||||
if (ferror(fout)) {
|
||||
break;
|
||||
}
|
||||
available_out = kFileBufferSize;
|
||||
next_out = output;
|
||||
} else {
|
||||
break; /* Error or success. */
|
||||
}
|
||||
result = BrotliDecompressStream(&available_in, &next_in,
|
||||
&available_out, &next_out, &total_out, s);
|
||||
}
|
||||
if (next_out != output) {
|
||||
fwrite(output, 1, static_cast<size_t>(next_out - output), fout);
|
||||
}
|
||||
delete[] input;
|
||||
delete[] output;
|
||||
BrotliDestroyState(s);
|
||||
if ((result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) || ferror(fout)) {
|
||||
fprintf(stderr, "failed to write output\n");
|
||||
exit(1);
|
||||
} else if (result != BROTLI_RESULT_SUCCESS) { /* Error or needs more input. */
|
||||
fprintf(stderr, "corrupt input\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
char *input_path = 0;
|
||||
char *output_path = 0;
|
||||
int force = 0;
|
||||
int quality = 11;
|
||||
int decompress = 0;
|
||||
int repeat = 1;
|
||||
int verbose = 0;
|
||||
int lgwin = 0;
|
||||
ParseArgv(argc, argv, &input_path, &output_path, &force,
|
||||
&quality, &decompress, &repeat, &verbose, &lgwin);
|
||||
const clock_t clock_start = clock();
|
||||
for (int i = 0; i < repeat; ++i) {
|
||||
FILE* fin = OpenInputFile(input_path);
|
||||
FILE* fout = OpenOutputFile(output_path, force);
|
||||
if (decompress) {
|
||||
Decompresss(fin, fout);
|
||||
} else {
|
||||
brotli::BrotliParams params;
|
||||
params.lgwin = lgwin;
|
||||
params.quality = quality;
|
||||
try {
|
||||
brotli::BrotliFileIn in(fin, 1 << 16);
|
||||
brotli::BrotliFileOut out(fout);
|
||||
if (!BrotliCompress(params, &in, &out)) {
|
||||
fprintf(stderr, "compression failed\n");
|
||||
unlink(output_path);
|
||||
exit(1);
|
||||
}
|
||||
} catch (std::bad_alloc&) {
|
||||
fprintf(stderr, "not enough memory\n");
|
||||
unlink(output_path);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (fclose(fin) != 0) {
|
||||
perror("fclose");
|
||||
exit(1);
|
||||
}
|
||||
if (fclose(fout) != 0) {
|
||||
perror("fclose");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (verbose) {
|
||||
const clock_t clock_end = clock();
|
||||
double duration =
|
||||
static_cast<double>(clock_end - clock_start) / CLOCKS_PER_SEC;
|
||||
if (duration < 1e-9) {
|
||||
duration = 1e-9;
|
||||
}
|
||||
int64_t uncompressed_size = FileSize(decompress ? output_path : input_path);
|
||||
if (uncompressed_size == -1) {
|
||||
fprintf(stderr, "failed to determine uncompressed file size\n");
|
||||
exit(1);
|
||||
}
|
||||
double uncompressed_bytes_in_MB =
|
||||
static_cast<double>(repeat * uncompressed_size) / (1024.0 * 1024.0);
|
||||
if (decompress) {
|
||||
printf("Brotli decompression speed: ");
|
||||
} else {
|
||||
printf("Brotli compression speed: ");
|
||||
}
|
||||
printf("%g MB/s\n", uncompressed_bytes_in_MB / duration);
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
#!/usr/bin/python
|
||||
#
|
||||
# Takes an .nroff source file and prints a text file in RFC format.
|
||||
#
|
||||
# Usage: rfc-format.py <source file>
|
||||
|
||||
import re
|
||||
import sys
|
||||
from subprocess import Popen, PIPE
|
||||
|
||||
|
||||
def Readfile(fn):
|
||||
f = open(fn, "r")
|
||||
return f.read()
|
||||
|
||||
|
||||
def FixNroffOutput(buf):
|
||||
p = re.compile(r'(.*)FORMFEED(\[Page\s+\d+\])$')
|
||||
strip_empty = False
|
||||
out = ""
|
||||
for line in buf.split("\n"):
|
||||
line = line.replace("\xe2\x80\x99", "'")
|
||||
line = line.replace("\xe2\x80\x90", "-")
|
||||
for i in range(len(line)):
|
||||
if ord(line[i]) > 128:
|
||||
print >>sys.stderr, "Invalid character %d\n" % ord(line[i])
|
||||
m = p.search(line)
|
||||
if strip_empty and len(line) == 0:
|
||||
continue
|
||||
if m:
|
||||
out += p.sub(r'\1 \2\n\f', line)
|
||||
out += "\n"
|
||||
strip_empty = True
|
||||
else:
|
||||
out += "%s\n" % line
|
||||
strip_empty = False
|
||||
return out.rstrip("\n")
|
||||
|
||||
|
||||
def Nroff(buf):
|
||||
p = Popen(["nroff", "-ms"], stdin=PIPE, stdout=PIPE)
|
||||
out, err = p.communicate(input=buf)
|
||||
return FixNroffOutput(out)
|
||||
|
||||
|
||||
def FormatTocLine(section, title, page):
|
||||
line = ""
|
||||
level = 1
|
||||
if section:
|
||||
level = section.count(".")
|
||||
for i in range(level):
|
||||
line += " "
|
||||
if section:
|
||||
line += "%s " % section
|
||||
line += "%s " % title
|
||||
pagenum = "%d" % page
|
||||
nspace = 72 - len(line) - len(pagenum)
|
||||
if nspace % 2:
|
||||
line += " "
|
||||
for i in range(nspace / 2):
|
||||
line += ". "
|
||||
line += "%d\n" % page
|
||||
return line
|
||||
|
||||
|
||||
def CreateToc(buf):
|
||||
p1 = re.compile(r'^((\d+\.)+)\s+(.*)$')
|
||||
p2 = re.compile(r'^(Appendix [A-Z].)\s+(.*)$')
|
||||
p3 = re.compile(r'\[Page (\d+)\]$')
|
||||
found = 0
|
||||
page = 1
|
||||
out = ""
|
||||
for line in buf.split("\n"):
|
||||
m1 = p1.search(line)
|
||||
m2 = p2.search(line)
|
||||
m3 = p3.search(line)
|
||||
if m1:
|
||||
out += FormatTocLine(m1.group(1), m1.group(3), page)
|
||||
elif m2:
|
||||
out += FormatTocLine(m2.group(1), m2.group(2), page)
|
||||
elif line.startswith("Authors"):
|
||||
out += FormatTocLine(None, line, page)
|
||||
elif m3:
|
||||
page = int(m3.group(1)) + 1
|
||||
return out
|
||||
|
||||
|
||||
src = Readfile(sys.argv[1])
|
||||
out = Nroff(src)
|
||||
toc = CreateToc(out)
|
||||
src = src.replace("INSERT_TOC_HERE", toc)
|
||||
print Nroff(src)
|
|
@ -0,0 +1,14 @@
|
|||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Defines a common version string used by all of the brotli tools. */
|
||||
|
||||
#ifndef BROTLI_TOOLS_VERSION_H_
|
||||
#define BROTLI_TOOLS_VERSION_H_
|
||||
|
||||
#define BROTLI_VERSION "0.4.0"
|
||||
|
||||
#endif /* BROTLI_TOOLS_VERSION_H_ */
|
|
@ -6,14 +6,19 @@
|
|||
MY_TEMP_DIR=`mktemp -d -t brotli_update.XXXXXX` || exit 1
|
||||
|
||||
git clone https://github.com/google/brotli ${MY_TEMP_DIR}/brotli
|
||||
git -C ${MY_TEMP_DIR}/brotli checkout v0.4.0
|
||||
|
||||
COMMIT=`(cd ${MY_TEMP_DIR}/brotli && git log | head -n 1)`
|
||||
perl -p -i -e "s/\[commit [0-9a-f]{40}\]/[${COMMIT}]/" README.mozilla;
|
||||
COMMIT=$(git -C ${MY_TEMP_DIR}/brotli rev-parse HEAD)
|
||||
perl -p -i -e "s/\[commit [0-9a-f]{40}\]/[commit ${COMMIT}]/" README.mozilla;
|
||||
|
||||
rm -rf dec
|
||||
mv ${MY_TEMP_DIR}/brotli/dec dec
|
||||
DIRS="dec enc tools"
|
||||
|
||||
for d in $DIRS; do
|
||||
rm -rf $d
|
||||
mv ${MY_TEMP_DIR}/brotli/$d $d
|
||||
done
|
||||
rm -rf ${MY_TEMP_DIR}
|
||||
hg add dec
|
||||
hg addremove $DIRS
|
||||
|
||||
echo "###"
|
||||
echo "### Updated brotli/dec to $COMMIT."
|
||||
|
|
Загрузка…
Ссылка в новой задаче