Bug 1355671 - Import brotli encoder and command line tool code. r=gps

--HG--
extra : rebase_source : f7a1326a7d39a34828b596140da8482718828352
This commit is contained in:
Mike Hommey 2017-04-07 15:57:31 +09:00
Родитель 8f667a0d22
Коммит 057318a71f
53 изменённых файлов: 37789 добавлений и 5 удалений

Просмотреть файл

@ -0,0 +1,14 @@
#brotli/enc
include ../shared.mk
OBJS_NODICT = backward_references.o block_splitter.o brotli_bit_stream.o compress_fragment.o compress_fragment_two_pass.o encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o metablock.o static_dict.o streams.o utf8_util.o
OBJS = $(OBJS_NODICT) dictionary.o
nodict : $(OBJS_NODICT)
all : $(OBJS)
clean :
rm -f $(OBJS) $(SO)

Просмотреть файл

@ -0,0 +1,858 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function to find backward reference copies.
#include "./backward_references.h"
#include <algorithm>
#include <limits>
#include <vector>
#include "./command.h"
#include "./fast_log.h"
#include "./literal_cost.h"
namespace brotli {
// The maximum length for which the zopflification uses distinct distances.
static const uint16_t kMaxZopfliLen = 325;
// Histogram based cost model for zopflification.
class ZopfliCostModel {
public:
ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {}
void SetFromCommands(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const Command* commands,
size_t num_commands,
size_t last_insert_len) {
std::vector<uint32_t> histogram_literal(256, 0);
std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0);
std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0);
size_t pos = position - last_insert_len;
for (size_t i = 0; i < num_commands; i++) {
size_t inslength = commands[i].insert_len_;
size_t copylength = commands[i].copy_len();
size_t distcode = commands[i].dist_prefix_;
size_t cmdcode = commands[i].cmd_prefix_;
histogram_cmd[cmdcode]++;
if (cmdcode >= 128) histogram_dist[distcode]++;
for (size_t j = 0; j < inslength; j++) {
histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
}
pos += inslength + copylength;
}
std::vector<float> cost_literal;
Set(histogram_literal, &cost_literal);
Set(histogram_cmd, &cost_cmd_);
Set(histogram_dist, &cost_dist_);
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
}
literal_costs_.resize(num_bytes + 1);
literal_costs_[0] = 0.0;
for (size_t i = 0; i < num_bytes; ++i) {
literal_costs_[i + 1] = literal_costs_[i] +
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
}
}
void SetFromLiteralCosts(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
literal_costs_.resize(num_bytes + 2);
EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
ringbuffer, &literal_costs_[1]);
literal_costs_[0] = 0.0;
for (size_t i = 0; i < num_bytes; ++i) {
literal_costs_[i + 1] += literal_costs_[i];
}
cost_cmd_.resize(kNumCommandPrefixes);
cost_dist_.resize(kNumDistancePrefixes);
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
}
for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
}
min_cost_cmd_ = static_cast<float>(FastLog2(11));
}
float GetCommandCost(
size_t dist_code, size_t length_code, size_t insert_length) const {
uint16_t inscode = GetInsertLengthCode(insert_length);
uint16_t copycode = GetCopyLengthCode(length_code);
uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0);
uint16_t dist_symbol;
uint32_t distextra;
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
uint32_t distnumextra = distextra >> 24;
float result = static_cast<float>(
GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
result += cost_cmd_[cmdcode];
if (cmdcode >= 128) result += cost_dist_[dist_symbol];
return result;
}
float GetLiteralCosts(size_t from, size_t to) const {
return literal_costs_[to] - literal_costs_[from];
}
float GetMinCostCmd(void) const {
return min_cost_cmd_;
}
private:
void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
cost->resize(histogram.size());
size_t sum = 0;
for (size_t i = 0; i < histogram.size(); i++) {
sum += histogram[i];
}
float log2sum = static_cast<float>(FastLog2(sum));
for (size_t i = 0; i < histogram.size(); i++) {
if (histogram[i] == 0) {
(*cost)[i] = log2sum + 2;
continue;
}
// Shannon bits for this symbol.
(*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
// Cannot be coded with less than 1 bit
if ((*cost)[i] < 1) (*cost)[i] = 1;
}
}
std::vector<float> cost_cmd_; // The insert and copy length symbols.
std::vector<float> cost_dist_;
// Cumulative costs of literals per position in the stream.
std::vector<float> literal_costs_;
float min_cost_cmd_;
};
inline size_t ComputeDistanceCode(size_t distance,
size_t max_distance,
int quality,
const int* dist_cache) {
if (distance <= max_distance) {
if (distance == static_cast<size_t>(dist_cache[0])) {
return 0;
} else if (distance == static_cast<size_t>(dist_cache[1])) {
return 1;
} else if (distance == static_cast<size_t>(dist_cache[2])) {
return 2;
} else if (distance == static_cast<size_t>(dist_cache[3])) {
return 3;
} else if (quality > 3 && distance >= 6) {
for (size_t k = 4; k < kNumDistanceShortCodes; ++k) {
size_t idx = kDistanceCacheIndex[k];
size_t candidate =
static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]);
static const size_t kLimits[16] = { 0, 0, 0, 0,
6, 6, 11, 11,
11, 11, 11, 11,
12, 12, 12, 12 };
if (distance == candidate && distance >= kLimits[k]) {
return k;
}
}
}
}
return distance + 15;
}
// REQUIRES: len >= 2, start_pos <= pos
// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
// Maintains the "ZopfliNode array invariant".
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
size_t len, size_t len_code, size_t dist,
size_t short_code, float cost) {
ZopfliNode& next = nodes[pos + len];
next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
next.distance = static_cast<uint32_t>(dist | (short_code << 25));
next.insert_length = static_cast<uint32_t>(pos - start_pos);
next.cost = cost;
}
// Maintains the smallest 2^k cost difference together with their positions
class StartPosQueue {
public:
struct PosData {
size_t pos;
int distance_cache[4];
float costdiff;
};
explicit StartPosQueue(int bits)
: mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
void Clear(void) {
idx_ = 0;
}
void Push(const StartPosQueue::PosData& posdata) {
size_t offset = ~idx_ & mask_;
++idx_;
size_t len = size();
q_[offset] = posdata;
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
for (size_t i = 1; i < len; ++i) {
if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
}
++offset;
}
}
size_t size(void) const { return std::min(idx_, mask_ + 1); }
const StartPosQueue::PosData& GetStartPosData(size_t k) const {
return q_[(k - idx_) & mask_];
}
private:
const size_t mask_;
std::vector<PosData> q_;
size_t idx_;
};
// Returns the minimum possible copy length that can improve the cost of any
// future position.
static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
const ZopfliNode* nodes,
const ZopfliCostModel& model,
const size_t num_bytes,
const size_t pos) {
// Compute the minimum possible cost of reaching any future position.
const size_t start0 = queue.GetStartPosData(0).pos;
float min_cost = (nodes[start0].cost +
model.GetLiteralCosts(start0, pos) +
model.GetMinCostCmd());
size_t len = 2;
size_t next_len_bucket = 4;
size_t next_len_offset = 10;
while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
// We already reached (pos + len) with no more cost than the minimum
// possible cost of reaching anything from this pos, so there is no point in
// looking for lengths <= len.
++len;
if (len == next_len_offset) {
// We reached the next copy length code bucket, so we add one more
// extra bit to the minimum cost.
min_cost += static_cast<float>(1.0);
next_len_offset += next_len_bucket;
next_len_bucket *= 2;
}
}
return len;
}
// Fills in dist_cache[0..3] with the last four distances (as defined by
// Section 4. of the Spec) that would be used at (block_start + pos) if we
// used the shortest path of commands from block_start, computed from
// nodes[0..pos]. The last four distances at block_start are in
// starting_dist_cach[0..3].
// REQUIRES: nodes[pos].cost < kInfinity
// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
static void ComputeDistanceCache(const size_t block_start,
const size_t pos,
const size_t max_backward,
const int* starting_dist_cache,
const ZopfliNode* nodes,
int* dist_cache) {
int idx = 0;
size_t p = pos;
// Because of prerequisite, does at most (pos + 1) / 2 iterations.
while (idx < 4 && p > 0) {
const size_t clen = nodes[p].copy_length();
const size_t ilen = nodes[p].insert_length;
const size_t dist = nodes[p].copy_distance();
// Since block_start + p is the end position of the command, the copy part
// starts from block_start + p - clen. Distances that are greater than this
// or greater than max_backward are static dictionary references, and do
// not update the last distances. Also distance code 0 (last distance)
// does not update the last distances.
if (dist + clen <= block_start + p && dist <= max_backward &&
nodes[p].distance_code() > 0) {
dist_cache[idx++] = static_cast<int>(dist);
}
// Because of prerequisite, p >= clen + ilen >= 2.
p -= clen + ilen;
}
for (; idx < 4; ++idx) {
dist_cache[idx] = *starting_dist_cache++;
}
}
static void UpdateNodes(const size_t num_bytes,
const size_t block_start,
const size_t pos,
const uint8_t* ringbuffer,
const size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* starting_dist_cache,
const size_t num_matches,
const BackwardMatch* matches,
const ZopfliCostModel* model,
StartPosQueue* queue,
ZopfliNode* nodes) {
size_t cur_ix = block_start + pos;
size_t cur_ix_masked = cur_ix & ringbuffer_mask;
size_t max_distance = std::min(cur_ix, max_backward_limit);
if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
StartPosQueue::PosData posdata;
posdata.pos = pos;
posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
ComputeDistanceCache(block_start, pos, max_backward_limit,
starting_dist_cache, nodes, posdata.distance_cache);
queue->Push(posdata);
}
const size_t min_len = ComputeMinimumCopyLength(
*queue, nodes, *model, num_bytes, pos);
// Go over the command starting positions in order of increasing cost
// difference.
for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
const size_t start = posdata.pos;
const float start_costdiff = posdata.costdiff;
// Look for last distance matches using the distance cache from this
// starting position.
size_t best_len = min_len - 1;
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
const size_t idx = kDistanceCacheIndex[j];
const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
kDistanceCacheOffset[j]);
size_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
}
if (PREDICT_FALSE(backward > max_distance)) {
continue;
}
prev_ix &= ringbuffer_mask;
if (cur_ix_masked + best_len > ringbuffer_mask ||
prev_ix + best_len > ringbuffer_mask ||
ringbuffer[cur_ix_masked + best_len] !=
ringbuffer[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
num_bytes - pos);
for (size_t l = best_len + 1; l <= len; ++l) {
const size_t inslen = pos - start;
float cmd_cost = model->GetCommandCost(j, l, inslen);
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
if (cost < nodes[pos + l].cost) {
UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
}
best_len = l;
}
}
// At higher iterations look only for new last distance matches, since
// looking only for new command start positions with the same distances
// does not help much.
if (k >= 2) continue;
// Loop through all possible copy lengths at this position.
size_t len = min_len;
for (size_t j = 0; j < num_matches; ++j) {
BackwardMatch match = matches[j];
size_t dist = match.distance;
bool is_dictionary_match = dist > max_distance;
// We already tried all possible last distance matches, so we can use
// normal distance code here.
size_t dist_code = dist + 15;
// Try all copy lengths up until the maximum copy length corresponding
// to this distance. If the distance refers to the static dictionary, or
// the maximum length is long enough, try only one maximum length.
size_t max_len = match.length();
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
len = max_len;
}
for (; len <= max_len; ++len) {
size_t len_code = is_dictionary_match ? match.length_code() : len;
const size_t inslen = pos - start;
float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
if (cost < nodes[pos + len].cost) {
UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
}
}
}
}
}
static void ComputeShortestPathFromNodes(size_t num_bytes,
const ZopfliNode* nodes,
std::vector<uint32_t>* path) {
std::vector<uint32_t> backwards(num_bytes / 2 + 1);
size_t index = num_bytes;
while (nodes[index].cost == kInfinity) --index;
size_t num_commands = 0;
while (index != 0) {
size_t len = nodes[index].command_length();
backwards[num_commands++] = static_cast<uint32_t>(len);
index -= len;
}
path->resize(num_commands);
for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
(*path)[j] = backwards[i - 1];
}
}
void ZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,
const size_t max_backward_limit,
const std::vector<uint32_t>& path,
const ZopfliNode* nodes,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_literals) {
size_t pos = 0;
for (size_t i = 0; i < path.size(); i++) {
const ZopfliNode& next = nodes[pos + path[i]];
size_t copy_length = next.copy_length();
size_t insert_length = next.insert_length;
pos += insert_length;
if (i == 0) {
insert_length += *last_insert_len;
*last_insert_len = 0;
}
size_t distance = next.copy_distance();
size_t len_code = next.length_code();
size_t max_distance = std::min(block_start + pos, max_backward_limit);
bool is_dictionary = (distance > max_distance);
size_t dist_code = next.distance_code();
Command cmd(insert_length, copy_length, len_code, dist_code);
commands[i] = cmd;
if (!is_dictionary && dist_code > 0) {
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = static_cast<int>(distance);
}
*num_literals += insert_length;
pos += copy_length;
}
*last_insert_len += num_bytes - pos;
}
static void ZopfliIterate(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
const ZopfliCostModel& model,
const std::vector<uint32_t>& num_matches,
const std::vector<BackwardMatch>& matches,
ZopfliNode* nodes,
std::vector<uint32_t>* path) {
nodes[0].length = 0;
nodes[0].cost = 0;
StartPosQueue queue(3);
size_t cur_match_pos = 0;
for (size_t i = 0; i + 3 < num_bytes; i++) {
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, num_matches[i],
&matches[cur_match_pos], &model, &queue, &nodes[0]);
cur_match_pos += num_matches[i];
// The zopflification can be too slow in case of very long lengths, so in
// such case skip it all, it does not cost a lot of compression ratio.
if (num_matches[i] == 1 &&
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
i += matches[cur_match_pos - 1].length() - 1;
queue.Clear();
}
}
ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
}
void ZopfliComputeShortestPath(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
Hashers::H10* hasher,
ZopfliNode* nodes,
std::vector<uint32_t>* path) {
nodes[0].length = 0;
nodes[0].cost = 0;
ZopfliCostModel* model = new ZopfliCostModel;
model->SetFromLiteralCosts(num_bytes, position,
ringbuffer, ringbuffer_mask);
StartPosQueue queue(3);
BackwardMatch matches[Hashers::H10::kMaxNumMatches];
for (size_t i = 0; i + 3 < num_bytes; i++) {
const size_t max_distance = std::min(position + i, max_backward_limit);
size_t num_matches = hasher->FindAllMatches(
ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
matches);
if (num_matches > 0 &&
matches[num_matches - 1].length() > kMaxZopfliLen) {
matches[0] = matches[num_matches - 1];
num_matches = 1;
}
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, num_matches, matches,
model, &queue, nodes);
if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
++i;
if (matches[0].length() - j < 64 &&
num_bytes - i >= kMaxTreeCompLength) {
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
}
}
queue.Clear();
}
}
delete model;
ComputeShortestPathFromNodes(num_bytes, nodes, path);
}
template<typename Hasher>
void CreateBackwardReferences(size_t num_bytes,
size_t position,
bool is_last,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const int quality,
const int lgwin,
Hasher* hasher,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
size_t* num_literals) {
// Set maximum distance, see section 9.1. of the spec.
const size_t max_backward_limit = (1 << lgwin) - 16;
// Choose which init method is faster.
// memset is about 100 times faster than hasher->InitForData().
const size_t kMaxBytesForPartialHashInit = Hasher::kHashMapSize >> 7;
if (position == 0 && is_last && num_bytes <= kMaxBytesForPartialHashInit) {
hasher->InitForData(ringbuffer, num_bytes);
} else {
hasher->Init();
}
if (num_bytes >= 3 && position >= 3) {
// Prepare the hashes for three last bytes of the last write.
// These could not be calculated before, since they require knowledge
// of both the previous and the current block.
hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
static_cast<uint32_t>(position - 3));
hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
static_cast<uint32_t>(position - 2));
hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
static_cast<uint32_t>(position - 1));
}
const Command * const orig_commands = commands;
size_t insert_length = *last_insert_len;
size_t i = position & ringbuffer_mask;
const size_t i_diff = position - i;
const size_t i_end = i + num_bytes;
// For speed up heuristics for random data.
const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
size_t apply_random_heuristics = i + random_heuristics_window_size;
// Minimum score to accept a backward reference.
const double kMinScore = 4.0;
while (i + Hasher::kHashTypeLength - 1 < i_end) {
size_t max_length = i_end - i;
size_t max_distance = std::min(i + i_diff, max_backward_limit);
size_t best_len = 0;
size_t best_len_code = 0;
size_t best_dist = 0;
double best_score = kMinScore;
bool match_found = hasher->FindLongestMatch(
ringbuffer, ringbuffer_mask,
dist_cache, static_cast<uint32_t>(i + i_diff), max_length, max_distance,
&best_len, &best_len_code, &best_dist, &best_score);
if (match_found) {
// Found a match. Let's look for something even better ahead.
int delayed_backward_references_in_row = 0;
for (;;) {
--max_length;
size_t best_len_2 =
quality < 5 ? std::min(best_len - 1, max_length) : 0;
size_t best_len_code_2 = 0;
size_t best_dist_2 = 0;
double best_score_2 = kMinScore;
max_distance = std::min(i + i_diff + 1, max_backward_limit);
match_found = hasher->FindLongestMatch(
ringbuffer, ringbuffer_mask,
dist_cache, static_cast<uint32_t>(i + i_diff + 1),
max_length, max_distance,
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
double cost_diff_lazy = 7.0;
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
// Ok, let's just write one byte for now and start a match from the
// next byte.
++i;
++insert_length;
best_len = best_len_2;
best_len_code = best_len_code_2;
best_dist = best_dist_2;
best_score = best_score_2;
if (++delayed_backward_references_in_row < 4) {
continue;
}
}
break;
}
apply_random_heuristics =
i + 2 * best_len + random_heuristics_window_size;
max_distance = std::min(i + i_diff, max_backward_limit);
// The first 16 codes are special shortcodes, and the minimum offset is 1.
size_t distance_code =
ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
if (best_dist <= max_distance && distance_code > 0) {
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = static_cast<int>(best_dist);
}
Command cmd(insert_length, best_len, best_len_code, distance_code);
*commands++ = cmd;
*num_literals += insert_length;
insert_length = 0;
// Put the hash keys into the table, if there are enough
// bytes left.
for (size_t j = 2; j < best_len; ++j) {
hasher->Store(&ringbuffer[i + j],
static_cast<uint32_t>(i + i_diff + j));
}
i += best_len;
} else {
++insert_length;
++i;
// If we have not seen matches for a long time, we can skip some
// match lookups. Unsuccessful match lookups are very very expensive
// and this kind of a heuristic speeds up compression quite
// a lot.
if (i > apply_random_heuristics) {
// Going through uncompressible data, jump.
if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
// It is quite a long time since we saw a copy, so we assume
// that this data is not compressible, and store hashes less
// often. Hashes of non compressible data are less likely to
// turn out to be useful in the future, too, so we store less of
// them to not to flood out the hash table of good compressible
// data.
size_t i_jump = std::min(i + 16, i_end - 4);
for (; i < i_jump; i += 4) {
hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
insert_length += 4;
}
} else {
size_t i_jump = std::min(i + 8, i_end - 3);
for (; i < i_jump; i += 2) {
hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
insert_length += 2;
}
}
}
}
}
insert_length += i_end - i;
*last_insert_len = insert_length;
*num_commands += static_cast<size_t>(commands - orig_commands);
}
void CreateBackwardReferences(size_t num_bytes,
size_t position,
bool is_last,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const int quality,
const int lgwin,
Hashers* hashers,
int hash_type,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
size_t* num_literals) {
bool zopflify = quality > 9;
if (zopflify) {
Hashers::H10* hasher = hashers->hash_h10;
hasher->Init(lgwin, position, num_bytes, is_last);
hasher->StitchToPreviousBlock(num_bytes, position,
ringbuffer, ringbuffer_mask);
// Set maximum distance, see section 9.1. of the spec.
const size_t max_backward_limit = (1 << lgwin) - 16;
if (quality == 10) {
std::vector<ZopfliNode> nodes(num_bytes + 1);
std::vector<uint32_t> path;
ZopfliComputeShortestPath(num_bytes, position,
ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, hasher,
&nodes[0], &path);
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
&nodes[0], dist_cache, last_insert_len, commands,
num_literals);
*num_commands += path.size();
return;
}
std::vector<uint32_t> num_matches(num_bytes);
std::vector<BackwardMatch> matches(4 * num_bytes);
size_t cur_match_pos = 0;
for (size_t i = 0; i + 3 < num_bytes; ++i) {
size_t max_distance = std::min(position + i, max_backward_limit);
size_t max_length = num_bytes - i;
// Ensure that we have enough free slots.
if (matches.size() < cur_match_pos + Hashers::H10::kMaxNumMatches) {
matches.resize(cur_match_pos + Hashers::H10::kMaxNumMatches);
}
size_t num_found_matches = hasher->FindAllMatches(
ringbuffer, ringbuffer_mask, position + i, max_length, max_distance,
&matches[cur_match_pos]);
const size_t cur_match_end = cur_match_pos + num_found_matches;
for (size_t j = cur_match_pos; j + 1 < cur_match_end; ++j) {
assert(matches[j].length() < matches[j + 1].length());
assert(matches[j].distance > max_distance ||
matches[j].distance <= matches[j + 1].distance);
}
num_matches[i] = static_cast<uint32_t>(num_found_matches);
if (num_found_matches > 0) {
const size_t match_len = matches[cur_match_end - 1].length();
if (match_len > kMaxZopfliLen) {
matches[cur_match_pos++] = matches[cur_match_end - 1];
num_matches[i] = 1;
for (size_t j = 1; j < match_len; ++j) {
++i;
if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
}
num_matches[i] = 0;
}
} else {
cur_match_pos = cur_match_end;
}
}
}
size_t orig_num_literals = *num_literals;
size_t orig_last_insert_len = *last_insert_len;
int orig_dist_cache[4] = {
dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
};
size_t orig_num_commands = *num_commands;
static const size_t kIterations = 2;
for (size_t i = 0; i < kIterations; i++) {
ZopfliCostModel model;
if (i == 0) {
model.SetFromLiteralCosts(num_bytes, position,
ringbuffer, ringbuffer_mask);
} else {
model.SetFromCommands(num_bytes, position,
ringbuffer, ringbuffer_mask,
commands, *num_commands - orig_num_commands,
orig_last_insert_len);
}
*num_commands = orig_num_commands;
*num_literals = orig_num_literals;
*last_insert_len = orig_last_insert_len;
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
std::vector<ZopfliNode> nodes(num_bytes + 1);
std::vector<uint32_t> path;
ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, model, num_matches, matches,
&nodes[0], &path);
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
&nodes[0], dist_cache, last_insert_len, commands,
num_literals);
*num_commands += path.size();
}
return;
}
switch (hash_type) {
case 2:
CreateBackwardReferences<Hashers::H2>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h2, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 3:
CreateBackwardReferences<Hashers::H3>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h3, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 4:
CreateBackwardReferences<Hashers::H4>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h4, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 5:
CreateBackwardReferences<Hashers::H5>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h5, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 6:
CreateBackwardReferences<Hashers::H6>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h6, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 7:
CreateBackwardReferences<Hashers::H7>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h7, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 8:
CreateBackwardReferences<Hashers::H8>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h8, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 9:
CreateBackwardReferences<Hashers::H9>(
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
quality, lgwin, hashers->hash_h9, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
default:
break;
}
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,116 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function to find backward reference copies.
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include <vector>
#include "./hash.h"
#include "./command.h"
#include "./types.h"
namespace brotli {
// "commands" points to the next output command to write to, "*num_commands" is
// initially the total amount of commands output by previous
// CreateBackwardReferences calls, and must be incremented by the amount written
// by this call.
void CreateBackwardReferences(size_t num_bytes,
size_t position,
bool is_last,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const int quality,
const int lgwin,
Hashers* hashers,
int hash_type,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
size_t* num_literals);
static const float kInfinity = std::numeric_limits<float>::infinity();
struct ZopfliNode {
ZopfliNode(void) : length(1),
distance(0),
insert_length(0),
cost(kInfinity) {}
inline uint32_t copy_length() const {
return length & 0xffffff;
}
inline uint32_t length_code() const {
const uint32_t modifier = length >> 24;
return copy_length() + 9u - modifier;
}
inline uint32_t copy_distance() const {
return distance & 0x1ffffff;
}
inline uint32_t distance_code() const {
const uint32_t short_code = distance >> 25;
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
}
inline uint32_t command_length() const {
return copy_length() + insert_length;
}
// best length to get up to this byte (not including this byte itself)
// highest 8 bit is used to reconstruct the length code
uint32_t length;
// distance associated with the length
// highest 7 bit contains distance short code + 1 (or zero if no short code)
uint32_t distance;
// number of literal inserts before this copy
uint32_t insert_length;
// smallest cost to get to this byte from the beginning, as found so far
float cost;
};
// Computes the shortest path of commands from position to at most
// position + num_bytes.
//
// On return, path->size() is the number of commands found and path[i] is the
// length of the ith command (copy length plus insert length).
// Note that the sum of the lengths of all commands can be less than num_bytes.
//
// On return, the nodes[0..num_bytes] array will have the following
// "ZopfliNode array invariant":
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
// (1) nodes[i].copy_length() >= 2
// (2) nodes[i].command_length() <= i and
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
void ZopfliComputeShortestPath(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
Hashers::H10* hasher,
ZopfliNode* nodes,
std::vector<uint32_t>* path);
void ZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,
const size_t max_backward_limit,
const std::vector<uint32_t>& path,
const ZopfliNode* nodes,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_literals);
} // namespace brotli
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_

Просмотреть файл

@ -0,0 +1,161 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions to estimate the bit cost of Huffman trees.
#ifndef BROTLI_ENC_BIT_COST_H_
#define BROTLI_ENC_BIT_COST_H_
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./types.h"
namespace brotli {
static inline double ShannonEntropy(const uint32_t *population, size_t size,
size_t *total) {
size_t sum = 0;
double retval = 0;
const uint32_t *population_end = population + size;
size_t p;
if (size & 1) {
goto odd_number_of_elements_left;
}
while (population < population_end) {
p = *population++;
sum += p;
retval -= static_cast<double>(p) * FastLog2(p);
odd_number_of_elements_left:
p = *population++;
sum += p;
retval -= static_cast<double>(p) * FastLog2(p);
}
if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
*total = sum;
return retval;
}
static inline double BitsEntropy(const uint32_t *population, size_t size) {
size_t sum;
double retval = ShannonEntropy(population, size, &sum);
if (retval < sum) {
// At least one bit per literal is needed.
retval = static_cast<double>(sum);
}
return retval;
}
template<int kSize>
double PopulationCost(const Histogram<kSize>& histogram) {
static const double kOneSymbolHistogramCost = 12;
static const double kTwoSymbolHistogramCost = 20;
static const double kThreeSymbolHistogramCost = 28;
static const double kFourSymbolHistogramCost = 37;
if (histogram.total_count_ == 0) {
return kOneSymbolHistogramCost;
}
int count = 0;
int s[5];
for (int i = 0; i < kSize; ++i) {
if (histogram.data_[i] > 0) {
s[count] = i;
++count;
if (count > 4) break;
}
}
if (count == 1) {
return kOneSymbolHistogramCost;
}
if (count == 2) {
return (kTwoSymbolHistogramCost +
static_cast<double>(histogram.total_count_));
}
if (count == 3) {
const uint32_t histo0 = histogram.data_[s[0]];
const uint32_t histo1 = histogram.data_[s[1]];
const uint32_t histo2 = histogram.data_[s[2]];
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
return (kThreeSymbolHistogramCost +
2 * (histo0 + histo1 + histo2) - histomax);
}
if (count == 4) {
uint32_t histo[4];
for (int i = 0; i < 4; ++i) {
histo[i] = histogram.data_[s[i]];
}
// Sort
for (int i = 0; i < 4; ++i) {
for (int j = i + 1; j < 4; ++j) {
if (histo[j] > histo[i]) {
std::swap(histo[j], histo[i]);
}
}
}
const uint32_t h23 = histo[2] + histo[3];
const uint32_t histomax = std::max(h23, histo[0]);
return (kFourSymbolHistogramCost +
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
}
// In this loop we compute the entropy of the histogram and simultaneously
// build a simplified histogram of the code length codes where we use the
// zero repeat code 17, but we don't use the non-zero repeat code 16.
double bits = 0;
size_t max_depth = 1;
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
const double log2total = FastLog2(histogram.total_count_);
for (size_t i = 0; i < kSize;) {
if (histogram.data_[i] > 0) {
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
// = log2(total_count) - log2(count(symbol))
double log2p = log2total - FastLog2(histogram.data_[i]);
// Approximate the bit depth by round(-log2(P(symbol)))
size_t depth = static_cast<size_t>(log2p + 0.5);
bits += histogram.data_[i] * log2p;
if (depth > 15) {
depth = 15;
}
if (depth > max_depth) {
max_depth = depth;
}
++depth_histo[depth];
++i;
} else {
// Compute the run length of zeros and add the appropriate number of 0 and
// 17 code length codes to the code length code histogram.
uint32_t reps = 1;
for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
++reps;
}
i += reps;
if (i == kSize) {
// Don't add any cost for the last zero run, since these are encoded
// only implicitly.
break;
}
if (reps < 3) {
depth_histo[0] += reps;
} else {
reps -= 2;
while (reps > 0) {
++depth_histo[17];
// Add the 3 extra bits for the 17 code length code.
bits += 3;
reps >>= 3;
}
}
}
}
// Add the estimated encoding cost of the code length code histogram.
bits += static_cast<double>(18 + 2 * max_depth);
// Add the entropy of the code length code histogram.
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
return bits;
}
} // namespace brotli
#endif // BROTLI_ENC_BIT_COST_H_

Просмотреть файл

@ -0,0 +1,505 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Block split point selection utilities.
#include "./block_splitter.h"
#include <assert.h>
#include <math.h>
#include <algorithm>
#include <cstring>
#include <vector>
#include "./cluster.h"
#include "./command.h"
#include "./fast_log.h"
#include "./histogram.h"
namespace brotli {
static const size_t kMaxLiteralHistograms = 100;
static const size_t kMaxCommandHistograms = 50;
static const double kLiteralBlockSwitchCost = 28.1;
static const double kCommandBlockSwitchCost = 13.5;
static const double kDistanceBlockSwitchCost = 14.6;
static const size_t kLiteralStrideLength = 70;
static const size_t kCommandStrideLength = 40;
static const size_t kSymbolsPerLiteralHistogram = 544;
static const size_t kSymbolsPerCommandHistogram = 530;
static const size_t kSymbolsPerDistanceHistogram = 544;
static const size_t kMinLengthForBlockSplitting = 128;
static const size_t kIterMulForRefining = 2;
static const size_t kMinItersForRefining = 100;
void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t offset,
const size_t mask,
std::vector<uint8_t>* literals) {
// Count how many we have.
size_t total_length = 0;
for (size_t i = 0; i < num_commands; ++i) {
total_length += cmds[i].insert_len_;
}
if (total_length == 0) {
return;
}
// Allocate.
literals->resize(total_length);
// Loop again, and copy this time.
size_t pos = 0;
size_t from_pos = offset & mask;
for (size_t i = 0; i < num_commands && pos < total_length; ++i) {
size_t insert_len = cmds[i].insert_len_;
if (from_pos + insert_len > mask) {
size_t head_size = mask + 1 - from_pos;
memcpy(&(*literals)[pos], data + from_pos, head_size);
from_pos = 0;
pos += head_size;
insert_len -= head_size;
}
if (insert_len > 0) {
memcpy(&(*literals)[pos], data + from_pos, insert_len);
pos += insert_len;
}
from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
}
}
inline static unsigned int MyRand(unsigned int* seed) {
*seed *= 16807U;
if (*seed == 0) {
*seed = 1;
}
return *seed;
}
template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length,
size_t stride,
size_t num_histograms,
HistogramType* histograms) {
for (size_t i = 0; i < num_histograms; ++i) {
histograms[i].Clear();
}
unsigned int seed = 7;
size_t block_length = length / num_histograms;
for (size_t i = 0; i < num_histograms; ++i) {
size_t pos = length * i / num_histograms;
if (i != 0) {
pos += MyRand(&seed) % block_length;
}
if (pos + stride >= length) {
pos = length - stride - 1;
}
histograms[i].Add(data + pos, stride);
}
}
template<typename HistogramType, typename DataType>
void RandomSample(unsigned int* seed,
const DataType* data,
size_t length,
size_t stride,
HistogramType* sample) {
size_t pos = 0;
if (stride >= length) {
pos = 0;
stride = length;
} else {
pos = MyRand(seed) % (length - stride + 1);
}
sample->Add(data + pos, stride);
}
template<typename HistogramType, typename DataType>
void RefineEntropyCodes(const DataType* data, size_t length,
size_t stride,
size_t num_histograms,
HistogramType* histograms) {
size_t iters =
kIterMulForRefining * length / stride + kMinItersForRefining;
unsigned int seed = 7;
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
for (size_t iter = 0; iter < iters; ++iter) {
HistogramType sample;
RandomSample(&seed, data, length, stride, &sample);
size_t ix = iter % num_histograms;
histograms[ix].AddHistogram(sample);
}
}
inline static double BitCost(size_t count) {
return count == 0 ? -2.0 : FastLog2(count);
}
// Assigns a block id from the range [0, vec.size()) to each data element
// in data[0..length) and fills in block_id[0..length) with the assigned values.
// Returns the number of blocks, i.e. one plus the number of block switches.
template<typename DataType, int kSize>
size_t FindBlocks(const DataType* data, const size_t length,
const double block_switch_bitcost,
const size_t num_histograms,
const Histogram<kSize>* histograms,
double* insert_cost,
double* cost,
uint8_t* switch_signal,
uint8_t *block_id) {
if (num_histograms <= 1) {
for (size_t i = 0; i < length; ++i) {
block_id[i] = 0;
}
return 1;
}
const size_t bitmaplen = (num_histograms + 7) >> 3;
assert(num_histograms <= 256);
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
for (size_t j = 0; j < num_histograms; ++j) {
insert_cost[j] = FastLog2(static_cast<uint32_t>(
histograms[j].total_count_));
}
for (size_t i = kSize; i != 0;) {
--i;
for (size_t j = 0; j < num_histograms; ++j) {
insert_cost[i * num_histograms + j] =
insert_cost[j] - BitCost(histograms[j].data_[i]);
}
}
memset(cost, 0, sizeof(cost[0]) * num_histograms);
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
// After each iteration of this loop, cost[k] will contain the difference
// between the minimum cost of arriving at the current byte position using
// entropy code k, and the minimum cost of arriving at the current byte
// position. This difference is capped at the block switch cost, and if it
// reaches block switch cost, it means that when we trace back from the last
// position, we need to switch here.
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
size_t ix = byte_ix * bitmaplen;
size_t insert_cost_ix = data[byte_ix] * num_histograms;
double min_cost = 1e99;
for (size_t k = 0; k < num_histograms; ++k) {
// We are coding the symbol in data[byte_ix] with entropy code k.
cost[k] += insert_cost[insert_cost_ix + k];
if (cost[k] < min_cost) {
min_cost = cost[k];
block_id[byte_ix] = static_cast<uint8_t>(k);
}
}
double block_switch_cost = block_switch_bitcost;
// More blocks for the beginning.
if (byte_ix < 2000) {
block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
}
for (size_t k = 0; k < num_histograms; ++k) {
cost[k] -= min_cost;
if (cost[k] >= block_switch_cost) {
cost[k] = block_switch_cost;
const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
assert((k >> 3) < bitmaplen);
switch_signal[ix + (k >> 3)] |= mask;
}
}
}
// Now trace back from the last position and switch at the marked places.
size_t byte_ix = length - 1;
size_t ix = byte_ix * bitmaplen;
uint8_t cur_id = block_id[byte_ix];
size_t num_blocks = 1;
while (byte_ix > 0) {
--byte_ix;
ix -= bitmaplen;
const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
if (switch_signal[ix + (cur_id >> 3)] & mask) {
if (cur_id != block_id[byte_ix]) {
cur_id = block_id[byte_ix];
++num_blocks;
}
}
block_id[byte_ix] = cur_id;
}
return num_blocks;
}
static size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
uint16_t* new_id, const size_t num_histograms) {
static const uint16_t kInvalidId = 256;
for (size_t i = 0; i < num_histograms; ++i) {
new_id[i] = kInvalidId;
}
uint16_t next_id = 0;
for (size_t i = 0; i < length; ++i) {
assert(block_ids[i] < num_histograms);
if (new_id[block_ids[i]] == kInvalidId) {
new_id[block_ids[i]] = next_id++;
}
}
for (size_t i = 0; i < length; ++i) {
block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
assert(block_ids[i] < num_histograms);
}
assert(next_id <= num_histograms);
return next_id;
}
template<typename HistogramType, typename DataType>
void BuildBlockHistograms(const DataType* data, const size_t length,
const uint8_t* block_ids,
const size_t num_histograms,
HistogramType* histograms) {
for (size_t i = 0; i < num_histograms; ++i) {
histograms[i].Clear();
}
for (size_t i = 0; i < length; ++i) {
histograms[block_ids[i]].Add(data[i]);
}
}
template<typename HistogramType, typename DataType>
void ClusterBlocks(const DataType* data, const size_t length,
const size_t num_blocks,
uint8_t* block_ids,
BlockSplit* split) {
static const size_t kMaxNumberOfBlockTypes = 256;
static const size_t kHistogramsPerBatch = 64;
static const size_t kClustersPerBatch = 16;
std::vector<uint32_t> histogram_symbols(num_blocks);
std::vector<uint32_t> block_lengths(num_blocks);
size_t block_idx = 0;
for (size_t i = 0; i < length; ++i) {
assert(block_idx < num_blocks);
++block_lengths[block_idx];
if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
++block_idx;
}
}
assert(block_idx == num_blocks);
const size_t expected_num_clusters =
kClustersPerBatch *
(num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
std::vector<HistogramType> all_histograms;
std::vector<uint32_t> cluster_size;
all_histograms.reserve(expected_num_clusters);
cluster_size.reserve(expected_num_clusters);
size_t num_clusters = 0;
std::vector<HistogramType> histograms(
std::min(num_blocks, kHistogramsPerBatch));
size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
std::vector<HistogramPair> pairs(max_num_pairs + 1);
size_t pos = 0;
for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
uint32_t sizes[kHistogramsPerBatch];
uint32_t clusters[kHistogramsPerBatch];
uint32_t symbols[kHistogramsPerBatch];
uint32_t remap[kHistogramsPerBatch];
for (size_t j = 0; j < num_to_combine; ++j) {
histograms[j].Clear();
for (size_t k = 0; k < block_lengths[i + j]; ++k) {
histograms[j].Add(data[pos++]);
}
histograms[j].bit_cost_ = PopulationCost(histograms[j]);
symbols[j] = clusters[j] = static_cast<uint32_t>(j);
sizes[j] = 1;
}
size_t num_new_clusters = HistogramCombine(
&histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
num_to_combine, kHistogramsPerBatch, max_num_pairs);
for (size_t j = 0; j < num_new_clusters; ++j) {
all_histograms.push_back(histograms[clusters[j]]);
cluster_size.push_back(sizes[clusters[j]]);
remap[clusters[j]] = static_cast<uint32_t>(j);
}
for (size_t j = 0; j < num_to_combine; ++j) {
histogram_symbols[i + j] =
static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
}
num_clusters += num_new_clusters;
assert(num_clusters == cluster_size.size());
assert(num_clusters == all_histograms.size());
}
max_num_pairs =
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
pairs.resize(max_num_pairs + 1);
std::vector<uint32_t> clusters(num_clusters);
for (size_t i = 0; i < num_clusters; ++i) {
clusters[i] = static_cast<uint32_t>(i);
}
size_t num_final_clusters =
HistogramCombine(&all_histograms[0], &cluster_size[0],
&histogram_symbols[0],
&clusters[0], &pairs[0], num_clusters,
num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
uint32_t next_index = 0;
pos = 0;
for (size_t i = 0; i < num_blocks; ++i) {
HistogramType histo;
for (size_t j = 0; j < block_lengths[i]; ++j) {
histo.Add(data[pos++]);
}
uint32_t best_out =
i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
double best_bits = HistogramBitCostDistance(
histo, all_histograms[best_out]);
for (size_t j = 0; j < num_final_clusters; ++j) {
const double cur_bits = HistogramBitCostDistance(
histo, all_histograms[clusters[j]]);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = clusters[j];
}
}
histogram_symbols[i] = best_out;
if (new_index[best_out] == kInvalidIndex) {
new_index[best_out] = next_index++;
}
}
uint8_t max_type = 0;
uint32_t cur_length = 0;
block_idx = 0;
split->types.resize(num_blocks);
split->lengths.resize(num_blocks);
for (size_t i = 0; i < num_blocks; ++i) {
cur_length += block_lengths[i];
if (i + 1 == num_blocks ||
histogram_symbols[i] != histogram_symbols[i + 1]) {
const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
split->types[block_idx] = id;
split->lengths[block_idx] = cur_length;
max_type = std::max(max_type, id);
cur_length = 0;
++block_idx;
}
}
split->types.resize(block_idx);
split->lengths.resize(block_idx);
split->num_types = static_cast<size_t>(max_type) + 1;
}
template<int kSize, typename DataType>
void SplitByteVector(const std::vector<DataType>& data,
const size_t literals_per_histogram,
const size_t max_histograms,
const size_t sampling_stride_length,
const double block_switch_cost,
BlockSplit* split) {
if (data.empty()) {
split->num_types = 1;
return;
} else if (data.size() < kMinLengthForBlockSplitting) {
split->num_types = 1;
split->types.push_back(0);
split->lengths.push_back(static_cast<uint32_t>(data.size()));
return;
}
size_t num_histograms = data.size() / literals_per_histogram + 1;
if (num_histograms > max_histograms) {
num_histograms = max_histograms;
}
Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
// Find good entropy codes.
InitialEntropyCodes(&data[0], data.size(),
sampling_stride_length,
num_histograms, histograms);
RefineEntropyCodes(&data[0], data.size(),
sampling_stride_length,
num_histograms, histograms);
// Find a good path through literals with the good entropy codes.
std::vector<uint8_t> block_ids(data.size());
size_t num_blocks;
const size_t bitmaplen = (num_histograms + 7) >> 3;
double* insert_cost = new double[kSize * num_histograms];
double *cost = new double[num_histograms];
uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
uint16_t* new_id = new uint16_t[num_histograms];
for (size_t i = 0; i < 10; ++i) {
num_blocks = FindBlocks(&data[0], data.size(),
block_switch_cost,
num_histograms, histograms,
insert_cost, cost, switch_signal,
&block_ids[0]);
num_histograms = RemapBlockIds(&block_ids[0], data.size(),
new_id, num_histograms);
BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
num_histograms, histograms);
}
delete[] insert_cost;
delete[] cost;
delete[] switch_signal;
delete[] new_id;
delete[] histograms;
ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
&block_ids[0], split);
}
void SplitBlock(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t pos,
const size_t mask,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
{
// Create a continuous array of literals.
std::vector<uint8_t> literals;
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
// Create the block split on the array of literals.
// Literal histograms have alphabet size 256.
SplitByteVector<256>(
literals,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
kLiteralStrideLength, kLiteralBlockSwitchCost,
literal_split);
}
{
// Compute prefix codes for commands.
std::vector<uint16_t> insert_and_copy_codes(num_commands);
for (size_t i = 0; i < num_commands; ++i) {
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
}
// Create the block split on the array of command prefixes.
SplitByteVector<kNumCommandPrefixes>(
insert_and_copy_codes,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
kCommandStrideLength, kCommandBlockSwitchCost,
insert_and_copy_split);
}
{
// Create a continuous array of distance prefixes.
std::vector<uint16_t> distance_prefixes(num_commands);
size_t pos = 0;
for (size_t i = 0; i < num_commands; ++i) {
const Command& cmd = cmds[i];
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
distance_prefixes[pos++] = cmd.dist_prefix_;
}
}
distance_prefixes.resize(pos);
// Create the block split on the array of distance prefixes.
SplitByteVector<kNumDistancePrefixes>(
distance_prefixes,
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
kCommandStrideLength, kDistanceBlockSwitchCost,
dist_split);
}
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,61 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Block split point selection utilities.
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
#include <vector>
#include "./command.h"
#include "./metablock.h"
#include "./types.h"
namespace brotli {
struct BlockSplitIterator {
explicit BlockSplitIterator(const BlockSplit& split)
: split_(split), idx_(0), type_(0), length_(0) {
if (!split.lengths.empty()) {
length_ = split.lengths[0];
}
}
void Next(void) {
if (length_ == 0) {
++idx_;
type_ = split_.types[idx_];
length_ = split_.lengths[idx_];
}
--length_;
}
const BlockSplit& split_;
size_t idx_;
size_t type_;
size_t length_;
};
void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t offset,
const size_t mask,
std::vector<uint8_t>* literals);
void SplitBlock(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t offset,
const size_t mask,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split);
} // namespace brotli
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,179 @@
/* Copyright 2014 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions to convert brotli-related data structures into the
// brotli bit stream. The functions here operate under
// assumption that there is enough space in the storage, i.e., there are
// no out-of-range checks anywhere.
//
// These functions do bit addressing into a byte array. The byte array
// is called "storage" and the index to the bit is called storage_ix
// in function arguments.
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
#include <vector>
#include "./entropy_encode.h"
#include "./metablock.h"
#include "./types.h"
namespace brotli {
// All Store functions here will use a storage_ix, which is always the bit
// position for the current storage.
// Stores a number between 0 and 255.
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
// Stores the compressed meta-block header.
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreCompressedMetaBlockHeader(bool final_block,
size_t length,
size_t* storage_ix,
uint8_t* storage);
// Stores the uncompressed meta-block header.
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreUncompressedMetaBlockHeader(size_t length,
size_t* storage_ix,
uint8_t* storage);
// Stores a context map where the histogram type is always the block type.
void StoreTrivialContextMap(size_t num_types,
size_t context_bits,
HuffmanTree* tree,
size_t* storage_ix,
uint8_t* storage);
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
const int num_codes,
const uint8_t *code_length_bitdepth,
size_t *storage_ix,
uint8_t *storage);
void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
size_t *storage_ix, uint8_t *storage);
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
// bits[0:length] and stores the encoded tree to the bit stream.
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length,
HuffmanTree* tree,
uint8_t* depth,
uint16_t* bits,
size_t* storage_ix,
uint8_t* storage);
void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
const size_t histogram_total,
const size_t max_bits,
uint8_t* depth,
uint16_t* bits,
size_t* storage_ix,
uint8_t* storage);
// Encodes the given context map to the bit stream. The number of different
// histogram ids is given by num_clusters.
void EncodeContextMap(const std::vector<uint32_t>& context_map,
size_t num_clusters,
HuffmanTree* tree,
size_t* storage_ix, uint8_t* storage);
// Data structure that stores everything that is needed to encode each block
// switch command.
struct BlockSplitCode {
std::vector<uint32_t> type_code;
std::vector<uint32_t> length_prefix;
std::vector<uint32_t> length_nextra;
std::vector<uint32_t> length_extra;
std::vector<uint8_t> type_depths;
std::vector<uint16_t> type_bits;
uint8_t length_depths[kNumBlockLenPrefixes];
uint16_t length_bits[kNumBlockLenPrefixes];
};
// Builds a BlockSplitCode data structure from the block split given by the
// vector of block types and block lengths and stores it to the bit stream.
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
const std::vector<uint32_t>& lengths,
const size_t num_types,
BlockSplitCode* code,
size_t* storage_ix,
uint8_t* storage);
// Stores the block switch command with index block_ix to the bit stream.
void StoreBlockSwitch(const BlockSplitCode& code,
const size_t block_ix,
size_t* storage_ix,
uint8_t* storage);
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreMetaBlock(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
bool final_block,
uint32_t num_direct_distance_codes,
uint32_t distance_postfix_bits,
ContextType literal_context_mode,
const brotli::Command *commands,
size_t n_commands,
const MetaBlockSplit& mb,
size_t *storage_ix,
uint8_t *storage);
// Stores the meta-block without doing any block splitting, just collects
// one histogram per block category and uses that for entropy coding.
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreMetaBlockTrivial(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
bool is_last,
const brotli::Command *commands,
size_t n_commands,
size_t *storage_ix,
uint8_t *storage);
// Same as above, but uses static prefix codes for histograms with a only a few
// symbols, and uses static code length prefix codes for all other histograms.
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreMetaBlockFast(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
bool is_last,
const brotli::Command *commands,
size_t n_commands,
size_t *storage_ix,
uint8_t *storage);
// This is for storing uncompressed blocks (simple raw storage of
// bytes-as-bytes).
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreUncompressedMetaBlock(bool final_block,
const uint8_t* input,
size_t position, size_t mask,
size_t len,
size_t* storage_ix,
uint8_t* storage);
// Stores an empty metadata meta-block and syncs to a byte boundary.
void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
} // namespace brotli
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_

Просмотреть файл

@ -0,0 +1,330 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions for clustering similar histograms together.
#ifndef BROTLI_ENC_CLUSTER_H_
#define BROTLI_ENC_CLUSTER_H_
#include <math.h>
#include <algorithm>
#include <utility>
#include <vector>
#include "./bit_cost.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./histogram.h"
#include "./port.h"
#include "./types.h"
namespace brotli {
struct HistogramPair {
uint32_t idx1;
uint32_t idx2;
double cost_combo;
double cost_diff;
};
inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
if (p1.cost_diff != p2.cost_diff) {
return p1.cost_diff > p2.cost_diff;
}
return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
}
// Returns entropy reduction of the context map when we combine two clusters.
inline double ClusterCostDiff(size_t size_a, size_t size_b) {
size_t size_c = size_a + size_b;
return static_cast<double>(size_a) * FastLog2(size_a) +
static_cast<double>(size_b) * FastLog2(size_b) -
static_cast<double>(size_c) * FastLog2(size_c);
}
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
template<typename HistogramType>
void CompareAndPushToQueue(const HistogramType* out,
const uint32_t* cluster_size,
uint32_t idx1, uint32_t idx2,
size_t max_num_pairs,
HistogramPair* pairs,
size_t* num_pairs) {
if (idx1 == idx2) {
return;
}
if (idx2 < idx1) {
uint32_t t = idx2;
idx2 = idx1;
idx1 = t;
}
bool store_pair = false;
HistogramPair p;
p.idx1 = idx1;
p.idx2 = idx2;
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
p.cost_diff -= out[idx1].bit_cost_;
p.cost_diff -= out[idx2].bit_cost_;
if (out[idx1].total_count_ == 0) {
p.cost_combo = out[idx2].bit_cost_;
store_pair = true;
} else if (out[idx2].total_count_ == 0) {
p.cost_combo = out[idx1].bit_cost_;
store_pair = true;
} else {
double threshold = *num_pairs == 0 ? 1e99 :
std::max(0.0, pairs[0].cost_diff);
HistogramType combo = out[idx1];
combo.AddHistogram(out[idx2]);
double cost_combo = PopulationCost(combo);
if (cost_combo < threshold - p.cost_diff) {
p.cost_combo = cost_combo;
store_pair = true;
}
}
if (store_pair) {
p.cost_diff += p.cost_combo;
if (*num_pairs > 0 && pairs[0] < p) {
// Replace the top of the queue if needed.
if (*num_pairs < max_num_pairs) {
pairs[*num_pairs] = pairs[0];
++(*num_pairs);
}
pairs[0] = p;
} else if (*num_pairs < max_num_pairs) {
pairs[*num_pairs] = p;
++(*num_pairs);
}
}
}
template<typename HistogramType>
size_t HistogramCombine(HistogramType* out,
uint32_t* cluster_size,
uint32_t* symbols,
uint32_t* clusters,
HistogramPair* pairs,
size_t num_clusters,
size_t symbols_size,
size_t max_clusters,
size_t max_num_pairs) {
double cost_diff_threshold = 0.0;
size_t min_cluster_size = 1;
// We maintain a vector of histogram pairs, with the property that the pair
// with the maximum bit cost reduction is the first.
size_t num_pairs = 0;
for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
max_num_pairs, &pairs[0], &num_pairs);
}
}
while (num_clusters > min_cluster_size) {
if (pairs[0].cost_diff >= cost_diff_threshold) {
cost_diff_threshold = 1e99;
min_cluster_size = max_clusters;
continue;
}
// Take the best pair from the top of heap.
uint32_t best_idx1 = pairs[0].idx1;
uint32_t best_idx2 = pairs[0].idx2;
out[best_idx1].AddHistogram(out[best_idx2]);
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
cluster_size[best_idx1] += cluster_size[best_idx2];
for (size_t i = 0; i < symbols_size; ++i) {
if (symbols[i] == best_idx2) {
symbols[i] = best_idx1;
}
}
for (size_t i = 0; i < num_clusters; ++i) {
if (clusters[i] == best_idx2) {
memmove(&clusters[i], &clusters[i + 1],
(num_clusters - i - 1) * sizeof(clusters[0]));
break;
}
}
--num_clusters;
// Remove pairs intersecting the just combined best pair.
size_t copy_to_idx = 0;
for (size_t i = 0; i < num_pairs; ++i) {
HistogramPair& p = pairs[i];
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
// Remove invalid pair from the queue.
continue;
}
if (pairs[0] < p) {
// Replace the top of the queue if needed.
HistogramPair front = pairs[0];
pairs[0] = p;
pairs[copy_to_idx] = front;
} else {
pairs[copy_to_idx] = p;
}
++copy_to_idx;
}
num_pairs = copy_to_idx;
// Push new pairs formed with the combined histogram to the heap.
for (size_t i = 0; i < num_clusters; ++i) {
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
max_num_pairs, &pairs[0], &num_pairs);
}
}
return num_clusters;
}
// -----------------------------------------------------------------------------
// Histogram refinement
// What is the bit cost of moving histogram from cur_symbol to candidate.
template<typename HistogramType>
double HistogramBitCostDistance(const HistogramType& histogram,
const HistogramType& candidate) {
if (histogram.total_count_ == 0) {
return 0.0;
}
HistogramType tmp = histogram;
tmp.AddHistogram(candidate);
return PopulationCost(tmp) - candidate.bit_cost_;
}
// Find the best 'out' histogram for each of the 'in' histograms.
// When called, clusters[0..num_clusters) contains the unique values from
// symbols[0..in_size), but this property is not preserved in this function.
// Note: we assume that out[]->bit_cost_ is already up-to-date.
template<typename HistogramType>
void HistogramRemap(const HistogramType* in, size_t in_size,
const uint32_t* clusters, size_t num_clusters,
HistogramType* out, uint32_t* symbols) {
for (size_t i = 0; i < in_size; ++i) {
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
for (size_t j = 0; j < num_clusters; ++j) {
const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = clusters[j];
}
}
symbols[i] = best_out;
}
// Recompute each out based on raw and symbols.
for (size_t j = 0; j < num_clusters; ++j) {
out[clusters[j]].Clear();
}
for (size_t i = 0; i < in_size; ++i) {
out[symbols[i]].AddHistogram(in[i]);
}
}
// Reorders elements of the out[0..length) array and changes values in
// symbols[0..length) array in the following way:
// * when called, symbols[] contains indexes into out[], and has N unique
// values (possibly N < length)
// * on return, symbols'[i] = f(symbols[i]) and
// out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
// where f is a bijection between the range of symbols[] and [0..N), and
// the first occurrences of values in symbols'[i] come in consecutive
// increasing order.
// Returns N, the number of unique values in symbols[].
template<typename HistogramType>
size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
std::vector<uint32_t> new_index(length, kInvalidIndex);
uint32_t next_index = 0;
for (size_t i = 0; i < length; ++i) {
if (new_index[symbols[i]] == kInvalidIndex) {
new_index[symbols[i]] = next_index;
++next_index;
}
}
std::vector<HistogramType> tmp(next_index);
next_index = 0;
for (size_t i = 0; i < length; ++i) {
if (new_index[symbols[i]] == next_index) {
tmp[next_index] = out[symbols[i]];
++next_index;
}
symbols[i] = new_index[symbols[i]];
}
for (size_t i = 0; i < next_index; ++i) {
out[i] = tmp[i];
}
return next_index;
}
// Clusters similar histograms in 'in' together, the selected histograms are
// placed in 'out', and for each index in 'in', *histogram_symbols will
// indicate which of the 'out' histograms is the best approximation.
template<typename HistogramType>
void ClusterHistograms(const std::vector<HistogramType>& in,
size_t num_contexts, size_t num_blocks,
size_t max_histograms,
std::vector<HistogramType>* out,
std::vector<uint32_t>* histogram_symbols) {
const size_t in_size = num_contexts * num_blocks;
assert(in_size == in.size());
std::vector<uint32_t> cluster_size(in_size, 1);
std::vector<uint32_t> clusters(in_size);
size_t num_clusters = 0;
out->resize(in_size);
histogram_symbols->resize(in_size);
for (size_t i = 0; i < in_size; ++i) {
(*out)[i] = in[i];
(*out)[i].bit_cost_ = PopulationCost(in[i]);
(*histogram_symbols)[i] = static_cast<uint32_t>(i);
}
const size_t max_input_histograms = 64;
// For the first pass of clustering, we allow all pairs.
size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
std::vector<HistogramPair> pairs(max_num_pairs + 1);
for (size_t i = 0; i < in_size; i += max_input_histograms) {
size_t num_to_combine = std::min(in_size - i, max_input_histograms);
for (size_t j = 0; j < num_to_combine; ++j) {
clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
}
size_t num_new_clusters =
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i],
&clusters[num_clusters], &pairs[0],
num_to_combine, num_to_combine,
max_histograms, max_num_pairs);
num_clusters += num_new_clusters;
}
// For the second pass, we limit the total number of histogram pairs.
// After this limit is reached, we only keep searching for the best pair.
max_num_pairs =
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
pairs.resize(max_num_pairs + 1);
// Collapse similar histograms.
num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[0], &clusters[0],
&pairs[0], num_clusters, in_size,
max_histograms, max_num_pairs);
// Find the optimal map from original histograms to the final ones.
HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
&(*out)[0], &(*histogram_symbols)[0]);
// Convert the context map to a canonical form.
size_t num_histograms =
HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
out->resize(num_histograms);
}
} // namespace brotli
#endif // BROTLI_ENC_CLUSTER_H_

Просмотреть файл

@ -0,0 +1,156 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// This class models a sequence of literals and a backward reference copy.
#ifndef BROTLI_ENC_COMMAND_H_
#define BROTLI_ENC_COMMAND_H_
#include "./fast_log.h"
#include "./prefix.h"
#include "./types.h"
namespace brotli {
static uint32_t kInsBase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
static uint32_t kInsExtra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
static uint32_t kCopyBase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
static inline uint16_t GetInsertLengthCode(size_t insertlen) {
if (insertlen < 6) {
return static_cast<uint16_t>(insertlen);
} else if (insertlen < 130) {
insertlen -= 2;
uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
} else if (insertlen < 2114) {
return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
} else if (insertlen < 6210) {
return 21u;
} else if (insertlen < 22594) {
return 22u;
} else {
return 23u;
}
}
static inline uint16_t GetCopyLengthCode(size_t copylen) {
if (copylen < 10) {
return static_cast<uint16_t>(copylen - 2);
} else if (copylen < 134) {
copylen -= 6;
uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
} else if (copylen < 2118) {
return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
} else {
return 23u;
}
}
static inline uint16_t CombineLengthCodes(
uint16_t inscode, uint16_t copycode, bool use_last_distance) {
uint16_t bits64 =
static_cast<uint16_t>((copycode & 0x7u) | ((inscode & 0x7u) << 3));
if (use_last_distance && inscode < 8 && copycode < 16) {
return (copycode < 8) ? bits64 : (bits64 | 64);
} else {
// "To convert an insert-and-copy length code to an insert length code and
// a copy length code, the following table can be used"
static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
448u, 576u, 640u };
return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
}
}
static inline void GetLengthCode(size_t insertlen, size_t copylen,
bool use_last_distance,
uint16_t* code) {
uint16_t inscode = GetInsertLengthCode(insertlen);
uint16_t copycode = GetCopyLengthCode(copylen);
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
}
static inline uint32_t GetInsertBase(uint16_t inscode) {
return kInsBase[inscode];
}
static inline uint32_t GetInsertExtra(uint16_t inscode) {
return kInsExtra[inscode];
}
static inline uint32_t GetCopyBase(uint16_t copycode) {
return kCopyBase[copycode];
}
static inline uint32_t GetCopyExtra(uint16_t copycode) {
return kCopyExtra[copycode];
}
struct Command {
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
Command(size_t insertlen, size_t copylen, size_t copylen_code,
size_t distance_code)
: insert_len_(static_cast<uint32_t>(insertlen)) {
copy_len_ = static_cast<uint32_t>(
copylen | ((copylen_code ^ copylen) << 24));
// The distance prefix and extra bits are stored in this Command as if
// npostfix and ndirect were 0, they are only recomputed later after the
// clustering if needed.
PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
&cmd_prefix_);
}
explicit Command(size_t insertlen)
: insert_len_(static_cast<uint32_t>(insertlen))
, copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
}
uint32_t DistanceCode(void) const {
if (dist_prefix_ < 16) {
return dist_prefix_;
}
uint32_t nbits = dist_extra_ >> 24;
uint32_t extra = dist_extra_ & 0xffffff;
uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
return (prefix << nbits) + extra + 12;
}
uint32_t DistanceContext(void) const {
uint32_t r = cmd_prefix_ >> 6;
uint32_t c = cmd_prefix_ & 7;
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
return c;
}
return 3;
}
inline uint32_t copy_len(void) const {
return copy_len_ & 0xFFFFFF;
}
inline uint32_t copy_len_code(void) const {
return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
}
uint32_t insert_len_;
/* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
uint32_t copy_len_;
uint32_t dist_extra_;
uint16_t cmd_prefix_;
uint16_t dist_prefix_;
};
} // namespace brotli
#endif // BROTLI_ENC_COMMAND_H_

Просмотреть файл

@ -0,0 +1,701 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses one-pass processing: when we find a backward
// match, we immediately emit the corresponding command and literal codes to
// the bit stream.
//
// Adapted from the CompressFragment() function in
// https://github.com/google/snappy/blob/master/snappy.cc
#include "./compress_fragment.h"
#include <algorithm>
#include <cstring>
#include "./brotli_bit_stream.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./port.h"
#include "./types.h"
#include "./write_bits.h"
namespace brotli {
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
static const uint32_t kHashMul32 = 0x1e35a7bd;
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
return static_cast<uint32_t>(h >> shift);
}
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
assert(offset >= 0);
assert(offset <= 3);
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
return static_cast<uint32_t>(h >> shift);
}
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
p1[4] == p2[4]);
}
// Builds a literal prefix code into "depths" and "bits" based on the statistics
// of the "input" string and stores it into the bit stream.
// Note that the prefix code here is built from the pre-LZ77 input, therefore
// we can only approximate the statistics of the actual literal stream.
// Moreover, for long inputs we build a histogram from a sample of the input
// and thus have to assign a non-zero depth for each literal.
static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
const size_t input_size,
uint8_t depths[256],
uint16_t bits[256],
size_t* storage_ix,
uint8_t* storage) {
uint32_t histogram[256] = { 0 };
size_t histogram_total;
if (input_size < (1 << 15)) {
for (size_t i = 0; i < input_size; ++i) {
++histogram[input[i]];
}
histogram_total = input_size;
for (size_t i = 0; i < 256; ++i) {
// We weigh the first 11 samples with weight 3 to account for the
// balancing effect of the LZ77 phase on the histogram.
const uint32_t adjust = 2 * std::min(histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
}
} else {
static const size_t kSampleRate = 29;
for (size_t i = 0; i < input_size; i += kSampleRate) {
++histogram[input[i]];
}
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
for (size_t i = 0; i < 256; ++i) {
// We add 1 to each population count to avoid 0 bit depths (since this is
// only a sample and we don't know if the symbol appears or not), and we
// weigh the first 11 samples with weight 3 to account for the balancing
// effect of the LZ77 phase on the histogram (more frequent symbols are
// more likely to be in backward references instead as literals).
const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
}
}
BuildAndStoreHuffmanTreeFast(histogram, histogram_total,
/* max_bits = */ 8,
depths, bits, storage_ix, storage);
}
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
// "bits" based on "histogram" and stores it into the bit stream.
static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
uint8_t depth[128],
uint16_t bits[128],
size_t* storage_ix,
uint8_t* storage) {
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols
// in this order in the command bits saves a few branches in the Emit*
// functions.
uint8_t cmd_depth[64];
uint16_t cmd_bits[64];
memcpy(cmd_depth, depth, 24);
memcpy(cmd_depth + 24, depth + 40, 8);
memcpy(cmd_depth + 32, depth + 24, 8);
memcpy(cmd_depth + 40, depth + 48, 8);
memcpy(cmd_depth + 48, depth + 32, 8);
memcpy(cmd_depth + 56, depth + 56, 8);
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
memcpy(bits, cmd_bits, 48);
memcpy(bits + 24, cmd_bits + 32, 16);
memcpy(bits + 32, cmd_bits + 48, 16);
memcpy(bits + 40, cmd_bits + 24, 16);
memcpy(bits + 48, cmd_bits + 40, 16);
memcpy(bits + 56, cmd_bits + 56, 16);
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
{
// Create the bit length array for the full command alphabet.
uint8_t cmd_depth[704] = { 0 };
memcpy(cmd_depth, depth, 8);
memcpy(cmd_depth + 64, depth + 8, 8);
memcpy(cmd_depth + 128, depth + 16, 8);
memcpy(cmd_depth + 192, depth + 24, 8);
memcpy(cmd_depth + 384, depth + 32, 8);
for (size_t i = 0; i < 8; ++i) {
cmd_depth[128 + 8 * i] = depth[40 + i];
cmd_depth[256 + 8 * i] = depth[48 + i];
cmd_depth[448 + 8 * i] = depth[56 + i];
}
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
}
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
}
// REQUIRES: insertlen < 6210
inline void EmitInsertLen(size_t insertlen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (insertlen < 6) {
const size_t code = insertlen + 40;
WriteBits(depth[code], bits[code], storage_ix, storage);
++histo[code];
} else if (insertlen < 130) {
insertlen -= 2;
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
const size_t prefix = insertlen >> nbits;
const size_t inscode = (nbits << 1) + prefix + 42;
WriteBits(depth[inscode], bits[inscode], storage_ix, storage);
WriteBits(nbits, insertlen - (prefix << nbits), storage_ix, storage);
++histo[inscode];
} else if (insertlen < 2114) {
insertlen -= 66;
const uint32_t nbits = Log2FloorNonZero(insertlen);
const size_t code = nbits + 50;
WriteBits(depth[code], bits[code], storage_ix, storage);
WriteBits(nbits, insertlen - (1 << nbits), storage_ix, storage);
++histo[code];
} else {
WriteBits(depth[61], bits[61], storage_ix, storage);
WriteBits(12, insertlen - 2114, storage_ix, storage);
++histo[21];
}
}
inline void EmitLongInsertLen(size_t insertlen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (insertlen < 22594) {
WriteBits(depth[62], bits[62], storage_ix, storage);
WriteBits(14, insertlen - 6210, storage_ix, storage);
++histo[22];
} else {
WriteBits(depth[63], bits[63], storage_ix, storage);
WriteBits(24, insertlen - 22594, storage_ix, storage);
++histo[23];
}
}
inline void EmitCopyLen(size_t copylen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (copylen < 10) {
WriteBits(depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
++histo[copylen + 14];
} else if (copylen < 134) {
copylen -= 6;
const uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
const size_t prefix = copylen >> nbits;
const size_t code = (nbits << 1) + prefix + 20;
WriteBits(depth[code], bits[code], storage_ix, storage);
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
++histo[code];
} else if (copylen < 2118) {
copylen -= 70;
const uint32_t nbits = Log2FloorNonZero(copylen);
const size_t code = nbits + 28;
WriteBits(depth[code], bits[code], storage_ix, storage);
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
++histo[code];
} else {
WriteBits(depth[39], bits[39], storage_ix, storage);
WriteBits(24, copylen - 2118, storage_ix, storage);
++histo[47];
}
}
inline void EmitCopyLenLastDistance(size_t copylen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (copylen < 12) {
WriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
++histo[copylen - 4];
} else if (copylen < 72) {
copylen -= 8;
const uint32_t nbits = Log2FloorNonZero(copylen) - 1;
const size_t prefix = copylen >> nbits;
const size_t code = (nbits << 1) + prefix + 4;
WriteBits(depth[code], bits[code], storage_ix, storage);
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
++histo[code];
} else if (copylen < 136) {
copylen -= 8;
const size_t code = (copylen >> 5) + 30;
WriteBits(depth[code], bits[code], storage_ix, storage);
WriteBits(5, copylen & 31, storage_ix, storage);
WriteBits(depth[64], bits[64], storage_ix, storage);
++histo[code];
++histo[64];
} else if (copylen < 2120) {
copylen -= 72;
const uint32_t nbits = Log2FloorNonZero(copylen);
const size_t code = nbits + 28;
WriteBits(depth[code], bits[code], storage_ix, storage);
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
WriteBits(depth[64], bits[64], storage_ix, storage);
++histo[code];
++histo[64];
} else {
WriteBits(depth[39], bits[39], storage_ix, storage);
WriteBits(24, copylen - 2120, storage_ix, storage);
WriteBits(depth[64], bits[64], storage_ix, storage);
++histo[47];
++histo[64];
}
}
inline void EmitDistance(size_t distance,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix, uint8_t* storage) {
distance += 3;
const uint32_t nbits = Log2FloorNonZero(distance) - 1u;
const size_t prefix = (distance >> nbits) & 1;
const size_t offset = (2 + prefix) << nbits;
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
WriteBits(depth[distcode], bits[distcode], storage_ix, storage);
WriteBits(nbits, distance - offset, storage_ix, storage);
++histo[distcode];
}
inline void EmitLiterals(const uint8_t* input, const size_t len,
const uint8_t depth[256], const uint16_t bits[256],
size_t* storage_ix, uint8_t* storage) {
for (size_t j = 0; j < len; j++) {
const uint8_t lit = input[j];
WriteBits(depth[lit], bits[lit], storage_ix, storage);
}
}
// REQUIRES: len <= 1 << 20.
static void StoreMetaBlockHeader(
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
// ISLAST
WriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
// MNIBBLES is 4
WriteBits(2, 0, storage_ix, storage);
WriteBits(16, len - 1, storage_ix, storage);
} else {
// MNIBBLES is 5
WriteBits(2, 1, storage_ix, storage);
WriteBits(20, len - 1, storage_ix, storage);
}
// ISUNCOMPRESSED
WriteBits(1, is_uncompressed, storage_ix, storage);
}
static void UpdateBits(size_t n_bits,
uint32_t bits,
size_t pos,
uint8_t *array) {
while (n_bits > 0) {
size_t byte_pos = pos >> 3;
size_t n_unchanged_bits = pos & 7;
size_t n_changed_bits = std::min(n_bits, 8 - n_unchanged_bits);
size_t total_bits = n_unchanged_bits + n_changed_bits;
uint32_t mask = (~((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1);
uint32_t unchanged_bits = array[byte_pos] & mask;
uint32_t changed_bits = bits & ((1 << n_changed_bits) - 1);
array[byte_pos] =
static_cast<uint8_t>((changed_bits << n_unchanged_bits) |
unchanged_bits);
n_bits -= n_changed_bits;
bits >>= n_changed_bits;
pos += n_changed_bits;
}
}
static void RewindBitPosition(const size_t new_storage_ix,
size_t* storage_ix, uint8_t* storage) {
const size_t bitpos = new_storage_ix & 7;
const size_t mask = (1u << bitpos) - 1;
storage[new_storage_ix >> 3] &= static_cast<uint8_t>(mask);
*storage_ix = new_storage_ix;
}
static bool ShouldMergeBlock(const uint8_t* data, size_t len,
const uint8_t* depths) {
size_t histo[256] = { 0 };
static const size_t kSampleRate = 43;
for (size_t i = 0; i < len; i += kSampleRate) {
++histo[data[i]];
}
const size_t total = (len + kSampleRate - 1) / kSampleRate;
double r = (FastLog2(total) + 0.5) * static_cast<double>(total) + 200;
for (size_t i = 0; i < 256; ++i) {
r -= static_cast<double>(histo[i]) * (depths[i] + FastLog2(histo[i]));
}
return r >= 0.0;
}
inline bool ShouldUseUncompressedMode(const uint8_t* metablock_start,
const uint8_t* next_emit,
const size_t insertlen,
const uint8_t literal_depths[256]) {
const size_t compressed = static_cast<size_t>(next_emit - metablock_start);
if (compressed * 50 > insertlen) {
return false;
}
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
static const double kMinEntropy =
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
uint32_t sum = 0;
for (int i = 0; i < 256; ++i) {
const uint32_t n = literal_depths[i];
sum += n << (15 - n);
}
return sum > static_cast<uint32_t>((1 << 15) * kMinEntropy);
}
static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
const size_t storage_ix_start,
size_t* storage_ix, uint8_t* storage) {
const size_t len = static_cast<size_t>(end - begin);
RewindBitPosition(storage_ix_start, storage_ix, storage);
StoreMetaBlockHeader(len, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], begin, len);
*storage_ix += len << 3;
storage[*storage_ix >> 3] = 0;
}
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
bool is_last,
int* table, size_t table_size,
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
size_t* cmd_code_numbits, uint8_t* cmd_code,
size_t* storage_ix, uint8_t* storage) {
if (input_size == 0) {
assert(is_last);
WriteBits(1, 1, storage_ix, storage); // islast
WriteBits(1, 1, storage_ix, storage); // isempty
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
}
// "next_emit" is a pointer to the first byte that is not covered by a
// previous copy. Bytes between "next_emit" and the start of the next copy or
// the end of the input will be emitted as literal bytes.
const uint8_t* next_emit = input;
// Save the start of the first block for position and distance computations.
const uint8_t* base_ip = input;
static const size_t kFirstBlockSize = 3 << 15;
static const size_t kMergeBlockSize = 1 << 16;
const uint8_t* metablock_start = input;
size_t block_size = std::min(input_size, kFirstBlockSize);
size_t total_block_size = block_size;
// Save the bit position of the MLEN field of the meta-block header, so that
// we can update it later if we decide to extend this meta-block.
size_t mlen_storage_ix = *storage_ix + 3;
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
// No block splits, no contexts.
WriteBits(13, 0, storage_ix, storage);
uint8_t lit_depth[256] = { 0 };
uint16_t lit_bits[256] = { 0 };
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
storage_ix, storage);
// Store the pre-compressed command and distance prefix codes.
for (size_t i = 0; i + 7 < *cmd_code_numbits; i += 8) {
WriteBits(8, cmd_code[i >> 3], storage_ix, storage);
}
WriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
storage_ix, storage);
emit_commands:
// Initialize the command and distance histograms. We will gather
// statistics of command and distance codes during the processing
// of this block and use it to update the command and distance
// prefix codes for the next block.
uint32_t cmd_histo[128] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0,
};
// "ip" is the input pointer.
const uint8_t* ip = input;
assert(table_size);
assert(table_size <= (1u << 31));
assert((table_size & (table_size - 1)) == 0); // table must be power of two
const size_t shift = 64u - Log2FloorNonZero(table_size);
assert(table_size - 1 == static_cast<size_t>(
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
const uint8_t* ip_end = input + block_size;
int last_distance = -1;
const size_t kInputMarginBytes = 16;
const size_t kMinMatchLen = 5;
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
// For the last block, we need to keep a 16 bytes margin so that we can be
// sure that all distances are at most window size - 16.
// For all other blocks, we only need to keep a margin of 5 bytes so that
// we don't go over the block size with a copy.
const size_t len_limit = std::min(block_size - kMinMatchLen,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
assert(next_emit < ip);
// Step 1: Scan forward in the input looking for a 5-byte-long match.
// If we get close to exhausting the input then goto emit_remainder.
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned, look at every third byte, etc.. When a match is found,
// immediately go back to looking at every byte. This is a small loss
// (~5% performance, ~0.1% density) for compressible data due to more
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
// win since the compressor quickly "realizes" the data is incompressible
// and doesn't bother looking for matches everywhere.
//
// The "skip" variable keeps track of how many bytes there are since the
// last match; dividing it by 32 (ie. right-shifting by five) gives the
// number of bytes to move ahead for each iteration.
uint32_t skip = 32;
const uint8_t* next_ip = ip;
const uint8_t* candidate;
do {
ip = next_ip;
uint32_t hash = next_hash;
assert(hash == Hash(ip, shift));
uint32_t bytes_between_hash_lookups = skip++ >> 5;
next_ip = ip + bytes_between_hash_lookups;
if (PREDICT_FALSE(next_ip > ip_limit)) {
goto emit_remainder;
}
next_hash = Hash(next_ip, shift);
candidate = ip - last_distance;
if (IsMatch(ip, candidate)) {
if (PREDICT_TRUE(candidate < ip)) {
table[hash] = static_cast<int>(ip - base_ip);
break;
}
}
candidate = base_ip + table[hash];
assert(candidate >= base_ip);
assert(candidate < ip);
table[hash] = static_cast<int>(ip - base_ip);
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
// Step 2: Emit the found match together with the literal bytes from
// "next_emit" to the bit stream, and then see if we can find a next macth
// immediately afterwards. Repeat until we find no match for the input
// without emitting some literal bytes.
uint64_t input_bytes;
{
// We have a 5-byte match at ip, and we need to emit bytes in
// [next_emit, ip).
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
ip += matched;
int distance = static_cast<int>(base - candidate); /* > 0 */
size_t insert = static_cast<size_t>(base - next_emit);
assert(0 == memcmp(base, candidate, matched));
if (PREDICT_TRUE(insert < 6210)) {
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
lit_depth)) {
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
storage_ix, storage);
input_size -= static_cast<size_t>(base - input);
input = base;
next_emit = input;
goto next_block;
} else {
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
}
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
storage_ix, storage);
if (distance == last_distance) {
WriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
++cmd_histo[64];
} else {
EmitDistance(static_cast<size_t>(distance), cmd_depth, cmd_bits,
cmd_histo, storage_ix, storage);
last_distance = distance;
}
EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
next_emit = ip;
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
candidate = base_ip + table[cur_hash];
table[cur_hash] = static_cast<int>(ip - base_ip);
}
while (IsMatch(ip, candidate)) {
// We have a 5-byte match at ip, and no need to emit any literal bytes
// prior to ip.
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
ip += matched;
last_distance = static_cast<int>(base - candidate); /* > 0 */
assert(0 == memcmp(base, candidate, matched));
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitDistance(static_cast<size_t>(last_distance), cmd_depth, cmd_bits,
cmd_histo, storage_ix, storage);
next_emit = ip;
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
candidate = base_ip + table[cur_hash];
table[cur_hash] = static_cast<int>(ip - base_ip);
}
next_hash = Hash(++ip, shift);
}
}
emit_remainder:
assert(next_emit <= ip_end);
input += block_size;
input_size -= block_size;
block_size = std::min(input_size, kMergeBlockSize);
// Decide if we want to continue this meta-block instead of emitting the
// last insert-only command.
if (input_size > 0 &&
total_block_size + block_size <= (1 << 20) &&
ShouldMergeBlock(input, block_size, lit_depth)) {
assert(total_block_size > (1 << 16));
// Update the size of the current meta-block and continue emitting commands.
// We can do this because the current size and the new size both have 5
// nibbles.
total_block_size += block_size;
UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
mlen_storage_ix, storage);
goto emit_commands;
}
// Emit the remaining bytes as literals.
if (next_emit < ip_end) {
const size_t insert = static_cast<size_t>(ip_end - next_emit);
if (PREDICT_TRUE(insert < 6210)) {
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
lit_depth)) {
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
storage_ix, storage);
} else {
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
storage_ix, storage);
}
}
next_emit = ip_end;
next_block:
// If we have more data, write a new meta-block header and prefix codes and
// then continue emitting commands.
if (input_size > 0) {
metablock_start = input;
block_size = std::min(input_size, kFirstBlockSize);
total_block_size = block_size;
// Save the bit position of the MLEN field of the meta-block header, so that
// we can update it later if we decide to extend this meta-block.
mlen_storage_ix = *storage_ix + 3;
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
// No block splits, no contexts.
WriteBits(13, 0, storage_ix, storage);
memset(lit_depth, 0, sizeof(lit_depth));
memset(lit_bits, 0, sizeof(lit_bits));
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
storage_ix, storage);
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
storage_ix, storage);
goto emit_commands;
}
if (is_last) {
WriteBits(1, 1, storage_ix, storage); // islast
WriteBits(1, 1, storage_ix, storage); // isempty
*storage_ix = (*storage_ix + 7u) & ~7u;
} else {
// If this is not the last block, update the command and distance prefix
// codes for the next block and store the compressed forms.
cmd_code[0] = 0;
*cmd_code_numbits = 0;
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
cmd_code_numbits, cmd_code);
}
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,47 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses one-pass processing: when we find a backward
// match, we immediately emit the corresponding command and literal codes to
// the bit stream.
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
#include "./types.h"
namespace brotli {
// Compresses "input" string to the "*storage" buffer as one or more complete
// meta-blocks, and updates the "*storage_ix" bit position.
//
// If "is_last" is true, emits an additional empty last meta-block.
//
// "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
// (see comment in encode.h) used for the encoding of this input fragment.
// If "is_last" is false, they are updated to reflect the statistics
// of this input fragment, to be used for the encoding of the next fragment.
//
// "*cmd_code_numbits" is the number of bits of the compressed representation
// of the command and distance prefix codes, and "cmd_code" is an array of
// at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
// command and distance prefix codes. If "is_last" is false, these are also
// updated to represent the updated "cmd_depth" and "cmd_bits".
//
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
// REQUIRES: "table_size" is a power of two
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
bool is_last,
int* table, size_t table_size,
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
size_t* cmd_code_numbits, uint8_t* cmd_code,
size_t* storage_ix, uint8_t* storage);
} // namespace brotli
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_H_

Просмотреть файл

@ -0,0 +1,524 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses two-pass processing: in the first pass we save
// the found backward matches and literal bytes into a buffer, and in the
// second pass we emit them into the bit stream using prefix codes built based
// on the actual command and literal byte histograms.
#include "./compress_fragment_two_pass.h"
#include <algorithm>
#include "./brotli_bit_stream.h"
#include "./bit_cost.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./port.h"
#include "./types.h"
#include "./write_bits.h"
namespace brotli {
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
static const uint32_t kHashMul32 = 0x1e35a7bd;
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
return static_cast<uint32_t>(h >> shift);
}
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
assert(offset >= 0);
assert(offset <= 2);
const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
return static_cast<uint32_t>(h >> shift);
}
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
p1[4] == p2[4] &&
p1[5] == p2[5]);
}
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
// "bits" based on "histogram" and stores it into the bit stream.
static void BuildAndStoreCommandPrefixCode(
const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128],
size_t* storage_ix, uint8_t* storage) {
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols
// in this order in the command bits saves a few branches in the Emit*
// functions.
uint8_t cmd_depth[64];
uint16_t cmd_bits[64];
memcpy(cmd_depth, depth + 24, 24);
memcpy(cmd_depth + 24, depth, 8);
memcpy(cmd_depth + 32, depth + 48, 8);
memcpy(cmd_depth + 40, depth + 8, 8);
memcpy(cmd_depth + 48, depth + 56, 8);
memcpy(cmd_depth + 56, depth + 16, 8);
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
memcpy(bits, cmd_bits + 24, 16);
memcpy(bits + 8, cmd_bits + 40, 16);
memcpy(bits + 16, cmd_bits + 56, 16);
memcpy(bits + 24, cmd_bits, 48);
memcpy(bits + 48, cmd_bits + 32, 16);
memcpy(bits + 56, cmd_bits + 48, 16);
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
{
// Create the bit length array for the full command alphabet.
uint8_t cmd_depth[704] = { 0 };
memcpy(cmd_depth, depth + 24, 8);
memcpy(cmd_depth + 64, depth + 32, 8);
memcpy(cmd_depth + 128, depth + 40, 8);
memcpy(cmd_depth + 192, depth + 48, 8);
memcpy(cmd_depth + 384, depth + 56, 8);
for (size_t i = 0; i < 8; ++i) {
cmd_depth[128 + 8 * i] = depth[i];
cmd_depth[256 + 8 * i] = depth[8 + i];
cmd_depth[448 + 8 * i] = depth[16 + i];
}
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
}
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
}
inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
if (insertlen < 6) {
**commands = insertlen;
} else if (insertlen < 130) {
insertlen -= 2;
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
const uint32_t prefix = insertlen >> nbits;
const uint32_t inscode = (nbits << 1) + prefix + 2;
const uint32_t extra = insertlen - (prefix << nbits);
**commands = inscode | (extra << 8);
} else if (insertlen < 2114) {
insertlen -= 66;
const uint32_t nbits = Log2FloorNonZero(insertlen);
const uint32_t code = nbits + 10;
const uint32_t extra = insertlen - (1 << nbits);
**commands = code | (extra << 8);
} else if (insertlen < 6210) {
const uint32_t extra = insertlen - 2114;
**commands = 21 | (extra << 8);
} else if (insertlen < 22594) {
const uint32_t extra = insertlen - 6210;
**commands = 22 | (extra << 8);
} else {
const uint32_t extra = insertlen - 22594;
**commands = 23 | (extra << 8);
}
++(*commands);
}
inline void EmitCopyLen(size_t copylen, uint32_t** commands) {
if (copylen < 10) {
**commands = static_cast<uint32_t>(copylen + 38);
} else if (copylen < 134) {
copylen -= 6;
const size_t nbits = Log2FloorNonZero(copylen) - 1;
const size_t prefix = copylen >> nbits;
const size_t code = (nbits << 1) + prefix + 44;
const size_t extra = copylen - (prefix << nbits);
**commands = static_cast<uint32_t>(code | (extra << 8));
} else if (copylen < 2118) {
copylen -= 70;
const size_t nbits = Log2FloorNonZero(copylen);
const size_t code = nbits + 52;
const size_t extra = copylen - (1 << nbits);
**commands = static_cast<uint32_t>(code | (extra << 8));
} else {
const size_t extra = copylen - 2118;
**commands = static_cast<uint32_t>(63 | (extra << 8));
}
++(*commands);
}
inline void EmitCopyLenLastDistance(size_t copylen, uint32_t** commands) {
if (copylen < 12) {
**commands = static_cast<uint32_t>(copylen + 20);
++(*commands);
} else if (copylen < 72) {
copylen -= 8;
const size_t nbits = Log2FloorNonZero(copylen) - 1;
const size_t prefix = copylen >> nbits;
const size_t code = (nbits << 1) + prefix + 28;
const size_t extra = copylen - (prefix << nbits);
**commands = static_cast<uint32_t>(code | (extra << 8));
++(*commands);
} else if (copylen < 136) {
copylen -= 8;
const size_t code = (copylen >> 5) + 54;
const size_t extra = copylen & 31;
**commands = static_cast<uint32_t>(code | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
} else if (copylen < 2120) {
copylen -= 72;
const size_t nbits = Log2FloorNonZero(copylen);
const size_t code = nbits + 52;
const size_t extra = copylen - (1 << nbits);
**commands = static_cast<uint32_t>(code | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
} else {
const size_t extra = copylen - 2120;
**commands = static_cast<uint32_t>(63 | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
}
}
inline void EmitDistance(uint32_t distance, uint32_t** commands) {
distance += 3;
uint32_t nbits = Log2FloorNonZero(distance) - 1;
const uint32_t prefix = (distance >> nbits) & 1;
const uint32_t offset = (2 + prefix) << nbits;
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
uint32_t extra = distance - offset;
**commands = distcode | (extra << 8);
++(*commands);
}
// REQUIRES: len <= 1 << 20.
static void StoreMetaBlockHeader(
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
// ISLAST
WriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
// MNIBBLES is 4
WriteBits(2, 0, storage_ix, storage);
WriteBits(16, len - 1, storage_ix, storage);
} else {
// MNIBBLES is 5
WriteBits(2, 1, storage_ix, storage);
WriteBits(20, len - 1, storage_ix, storage);
}
// ISUNCOMPRESSED
WriteBits(1, is_uncompressed, storage_ix, storage);
}
static void CreateCommands(const uint8_t* input, size_t block_size,
size_t input_size, const uint8_t* base_ip,
int* table, size_t table_size,
uint8_t** literals, uint32_t** commands) {
// "ip" is the input pointer.
const uint8_t* ip = input;
assert(table_size);
assert(table_size <= (1u << 31));
assert((table_size & (table_size - 1)) == 0); // table must be power of two
const size_t shift = 64u - Log2FloorNonZero(table_size);
assert(table_size - 1 == static_cast<size_t>(
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
const uint8_t* ip_end = input + block_size;
// "next_emit" is a pointer to the first byte that is not covered by a
// previous copy. Bytes between "next_emit" and the start of the next copy or
// the end of the input will be emitted as literal bytes.
const uint8_t* next_emit = input;
int last_distance = -1;
const size_t kInputMarginBytes = 16;
const size_t kMinMatchLen = 6;
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
// For the last block, we need to keep a 16 bytes margin so that we can be
// sure that all distances are at most window size - 16.
// For all other blocks, we only need to keep a margin of 5 bytes so that
// we don't go over the block size with a copy.
const size_t len_limit = std::min(block_size - kMinMatchLen,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
assert(next_emit < ip);
// Step 1: Scan forward in the input looking for a 6-byte-long match.
// If we get close to exhausting the input then goto emit_remainder.
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned, look at every third byte, etc.. When a match is found,
// immediately go back to looking at every byte. This is a small loss
// (~5% performance, ~0.1% density) for compressible data due to more
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
// win since the compressor quickly "realizes" the data is incompressible
// and doesn't bother looking for matches everywhere.
//
// The "skip" variable keeps track of how many bytes there are since the
// last match; dividing it by 32 (ie. right-shifting by five) gives the
// number of bytes to move ahead for each iteration.
uint32_t skip = 32;
const uint8_t* next_ip = ip;
const uint8_t* candidate;
do {
ip = next_ip;
uint32_t hash = next_hash;
assert(hash == Hash(ip, shift));
uint32_t bytes_between_hash_lookups = skip++ >> 5;
next_ip = ip + bytes_between_hash_lookups;
if (PREDICT_FALSE(next_ip > ip_limit)) {
goto emit_remainder;
}
next_hash = Hash(next_ip, shift);
candidate = ip - last_distance;
if (IsMatch(ip, candidate)) {
if (PREDICT_TRUE(candidate < ip)) {
table[hash] = static_cast<int>(ip - base_ip);
break;
}
}
candidate = base_ip + table[hash];
assert(candidate >= base_ip);
assert(candidate < ip);
table[hash] = static_cast<int>(ip - base_ip);
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
// Step 2: Emit the found match together with the literal bytes from
// "next_emit", and then see if we can find a next macth immediately
// afterwards. Repeat until we find no match for the input
// without emitting some literal bytes.
uint64_t input_bytes;
{
// We have a 6-byte match at ip, and we need to emit bytes in
// [next_emit, ip).
const uint8_t* base = ip;
size_t matched = 6 + FindMatchLengthWithLimit(
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
ip += matched;
int distance = static_cast<int>(base - candidate); /* > 0 */
int insert = static_cast<int>(base - next_emit);
assert(0 == memcmp(base, candidate, matched));
EmitInsertLen(static_cast<uint32_t>(insert), commands);
memcpy(*literals, next_emit, static_cast<size_t>(insert));
*literals += insert;
if (distance == last_distance) {
**commands = 64;
++(*commands);
} else {
EmitDistance(static_cast<uint32_t>(distance), commands);
last_distance = distance;
}
EmitCopyLenLastDistance(matched, commands);
next_emit = ip;
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
candidate = base_ip + table[cur_hash];
table[cur_hash] = static_cast<int>(ip - base_ip);
}
while (IsMatch(ip, candidate)) {
// We have a 6-byte match at ip, and no need to emit any
// literal bytes prior to ip.
const uint8_t* base = ip;
size_t matched = 6 + FindMatchLengthWithLimit(
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
ip += matched;
last_distance = static_cast<int>(base - candidate); /* > 0 */
assert(0 == memcmp(base, candidate, matched));
EmitCopyLen(matched, commands);
EmitDistance(static_cast<uint32_t>(last_distance), commands);
next_emit = ip;
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
candidate = base_ip + table[cur_hash];
table[cur_hash] = static_cast<int>(ip - base_ip);
}
next_hash = Hash(++ip, shift);
}
}
emit_remainder:
assert(next_emit <= ip_end);
// Emit the remaining bytes as literals.
if (next_emit < ip_end) {
const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
EmitInsertLen(insert, commands);
memcpy(*literals, next_emit, insert);
*literals += insert;
}
}
static void StoreCommands(const uint8_t* literals, const size_t num_literals,
const uint32_t* commands, const size_t num_commands,
size_t* storage_ix, uint8_t* storage) {
uint8_t lit_depths[256] = { 0 };
uint16_t lit_bits[256] = { 0 };
uint32_t lit_histo[256] = { 0 };
for (size_t i = 0; i < num_literals; ++i) {
++lit_histo[literals[i]];
}
BuildAndStoreHuffmanTreeFast(lit_histo, num_literals,
/* max_bits = */ 8,
lit_depths, lit_bits,
storage_ix, storage);
uint8_t cmd_depths[128] = { 0 };
uint16_t cmd_bits[128] = { 0 };
uint32_t cmd_histo[128] = { 0 };
for (size_t i = 0; i < num_commands; ++i) {
++cmd_histo[commands[i] & 0xff];
}
cmd_histo[1] += 1;
cmd_histo[2] += 1;
cmd_histo[64] += 1;
cmd_histo[84] += 1;
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
storage_ix, storage);
static const uint32_t kNumExtraBits[128] = {
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
};
static const uint32_t kInsertOffset[24] = {
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
1090, 2114, 6210, 22594,
};
for (size_t i = 0; i < num_commands; ++i) {
const uint32_t cmd = commands[i];
const uint32_t code = cmd & 0xff;
const uint32_t extra = cmd >> 8;
WriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
WriteBits(kNumExtraBits[code], extra, storage_ix, storage);
if (code < 24) {
const uint32_t insert = kInsertOffset[code] + extra;
for (uint32_t j = 0; j < insert; ++j) {
const uint8_t lit = *literals;
WriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
++literals;
}
}
}
}
static bool ShouldCompress(const uint8_t* input, size_t input_size,
size_t num_literals) {
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
static const double kMaxRatioOfLiterals =
1.0 - kAcceptableLossForUncompressibleSpeedup;
if (num_literals < kMaxRatioOfLiterals * static_cast<double>(input_size)) {
return true;
}
uint32_t literal_histo[256] = { 0 };
static const uint32_t kSampleRate = 43;
static const double kMaxEntropy =
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
const double max_total_bit_cost =
static_cast<double>(input_size) * kMaxEntropy / kSampleRate;
for (size_t i = 0; i < input_size; i += kSampleRate) {
++literal_histo[input[i]];
}
return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
}
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
bool is_last,
uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_size,
size_t* storage_ix, uint8_t* storage) {
// Save the start of the first block for position and distance computations.
const uint8_t* base_ip = input;
while (input_size > 0) {
size_t block_size = std::min(input_size, kCompressFragmentTwoPassBlockSize);
uint32_t* commands = command_buf;
uint8_t* literals = literal_buf;
CreateCommands(input, block_size, input_size, base_ip, table, table_size,
&literals, &commands);
const size_t num_literals = static_cast<size_t>(literals - literal_buf);
const size_t num_commands = static_cast<size_t>(commands - command_buf);
if (ShouldCompress(input, block_size, num_literals)) {
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
// No block splits, no contexts.
WriteBits(13, 0, storage_ix, storage);
StoreCommands(literal_buf, num_literals, command_buf, num_commands,
storage_ix, storage);
} else {
// Since we did not find many backward references and the entropy of
// the data is close to 8 bits, we can simply emit an uncompressed block.
// This makes compression speed of uncompressible data about 3x faster.
StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], input, block_size);
*storage_ix += block_size << 3;
storage[*storage_ix >> 3] = 0;
}
input += block_size;
input_size -= block_size;
}
if (is_last) {
WriteBits(1, 1, storage_ix, storage); // islast
WriteBits(1, 1, storage_ix, storage); // isempty
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,40 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses two-pass processing: in the first pass we save
// the found backward matches and literal bytes into a buffer, and in the
// second pass we emit them into the bit stream using prefix codes built based
// on the actual command and literal byte histograms.
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#include "./types.h"
namespace brotli {
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
// Compresses "input" string to the "*storage" buffer as one or more complete
// meta-blocks, and updates the "*storage_ix" bit position.
//
// If "is_last" is true, emits an additional empty last meta-block.
//
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
// REQUIRES: "command_buf" and "literal_buf" point to at least
// kCompressFragmentTwoPassBlockSize long arrays.
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
// REQUIRES: "table_size" is a power of two
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
bool is_last,
uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_size,
size_t* storage_ix, uint8_t* storage);
} // namespace brotli
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_

Просмотреть файл

@ -0,0 +1,15 @@
/* Copyright 2016 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* C++ API for Brotli compression. */
#ifndef BROTLI_ENC_COMPRESSOR_H_
#define BROTLI_ENC_COMPRESSOR_H_
#include "./encode.h"
#include "./streams.h"
#endif /* BROTLI_ENC_COMPRESSOR_H_ */

Просмотреть файл

@ -0,0 +1,178 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions to map previous bytes into a context id.
#ifndef BROTLI_ENC_CONTEXT_H_
#define BROTLI_ENC_CONTEXT_H_
#include "./types.h"
namespace brotli {
// Second-order context lookup table for UTF8 byte streams.
//
// If p1 and p2 are the previous two bytes, we calculate the context as
//
// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
//
// If the previous two bytes are ASCII characters (i.e. < 128), this will be
// equivalent to
//
// context = 4 * context1(p1) + context2(p2),
//
// where context1 is based on the previous byte in the following way:
//
// 0 : non-ASCII control
// 1 : \t, \n, \r
// 2 : space
// 3 : other punctuation
// 4 : " '
// 5 : %
// 6 : ( < [ {
// 7 : ) > ] }
// 8 : , ; :
// 9 : .
// 10 : =
// 11 : number
// 12 : upper-case vowel
// 13 : upper-case consonant
// 14 : lower-case vowel
// 15 : lower-case consonant
//
// and context2 is based on the second last byte:
//
// 0 : control, space
// 1 : punctuation
// 2 : upper-case letter, number
// 3 : lower-case letter
//
// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
// stream it will be a continuation byte, value between 128 and 191), the
// context is the same as if the second last byte was an ASCII control or space.
//
// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
// be a continuation byte and the context id is 2 or 3 depending on the LSB of
// the last byte and to a lesser extent on the second last byte if it is ASCII.
//
// If the last byte is a UTF8 continuation byte, the second last byte can be:
// - continuation byte: the next byte is probably ASCII or lead byte (assuming
// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
//
// The possible value combinations of the previous two bytes, the range of
// context ids and the type of the next byte is summarized in the table below:
//
// |--------\-----------------------------------------------------------------|
// | \ Last byte |
// | Second \---------------------------------------------------------------|
// | last byte \ ASCII | cont. byte | lead byte |
// | \ (0-127) | (128-191) | (192-) |
// |=============|===================|=====================|==================|
// | ASCII | next: ASCII/lead | not valid | next: cont. |
// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
// |-------------|-------------------|---------------------|------------------|
// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
// |-------------|-------------------|---------------------|------------------|
// | lead byte | not valid | next: ASCII/lead | not valid |
// | (192-207) | | context: 0 - 1 | |
// |-------------|-------------------|---------------------|------------------|
// | lead byte | not valid | next: cont. | not valid |
// | (208-) | | context: 2 - 3 | |
// |-------------|-------------------|---------------------|------------------|
static const uint8_t kUTF8ContextLookup[512] = {
// Last byte.
//
// ASCII range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
// UTF8 continuation byte range.
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
// UTF8 lead byte range.
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
// Second last byte.
//
// ASCII range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
// UTF8 continuation byte range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// UTF8 lead byte range.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
// Context lookup table for small signed integers.
static const uint8_t kSigned3BitContextLookup[] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
};
enum ContextType {
CONTEXT_LSB6 = 0,
CONTEXT_MSB6 = 1,
CONTEXT_UTF8 = 2,
CONTEXT_SIGNED = 3
};
static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
switch (mode) {
case CONTEXT_LSB6:
return p1 & 0x3f;
case CONTEXT_MSB6:
return static_cast<uint8_t>(p1 >> 2);
case CONTEXT_UTF8:
return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
case CONTEXT_SIGNED:
return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
kSigned3BitContextLookup[p2]);
default:
return 0;
}
}
} // namespace brotli
#endif // BROTLI_ENC_CONTEXT_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,41 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Collection of static dictionary words.
#ifndef BROTLI_ENC_DICTIONARY_H_
#define BROTLI_ENC_DICTIONARY_H_
#include "./types.h"
// No namespace, use same identifier as for the C decoder.
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
extern const uint8_t kBrotliDictionary[122784];
static const uint32_t kBrotliDictionaryOffsetsByLength[] = {
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032,
53248, 63488, 74752, 87040, 93696, 100864, 104704, 106752, 108928, 113536,
115968, 118528, 119872, 121280, 122016,
};
static const uint8_t kBrotliDictionarySizeBitsByLength[] = {
0, 0, 0, 0, 10, 10, 11, 11, 10, 10,
10, 10, 10, 9, 9, 8, 7, 7, 8, 7,
7, 6, 6, 5, 5,
};
static const int kBrotliMinDictionaryWordLength = 4;
static const int kBrotliMaxDictionaryWordLength = 24;
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif
#endif // BROTLI_ENC_DICTIONARY_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

1180
modules/brotli/enc/encode.cc Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

209
modules/brotli/enc/encode.h Normal file
Просмотреть файл

@ -0,0 +1,209 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// API for Brotli compression
#ifndef BROTLI_ENC_ENCODE_H_
#define BROTLI_ENC_ENCODE_H_
#include <string>
#include <vector>
#include "./command.h"
#include "./hash.h"
#include "./ringbuffer.h"
#include "./static_dict.h"
#include "./streams.h"
#include "./types.h"
namespace brotli {
static const int kMaxWindowBits = 24;
static const int kMinWindowBits = 10;
static const int kMinInputBlockBits = 16;
static const int kMaxInputBlockBits = 24;
struct BrotliParams {
BrotliParams(void)
: mode(MODE_GENERIC),
quality(11),
lgwin(22),
lgblock(0),
enable_dictionary(true),
enable_transforms(false),
greedy_block_split(false),
enable_context_modeling(true) {}
enum Mode {
// Default compression mode. The compressor does not know anything in
// advance about the properties of the input.
MODE_GENERIC = 0,
// Compression mode for UTF-8 format text input.
MODE_TEXT = 1,
// Compression mode used in WOFF 2.0.
MODE_FONT = 2
};
Mode mode;
// Controls the compression-speed vs compression-density tradeoffs. The higher
// the quality, the slower the compression. Range is 0 to 11.
int quality;
// Base 2 logarithm of the sliding window size. Range is 10 to 24.
int lgwin;
// Base 2 logarithm of the maximum input block size. Range is 16 to 24.
// If set to 0, the value will be set based on the quality.
int lgblock;
// These settings are deprecated and will be ignored.
// All speed vs. size compromises are controlled by the quality param.
bool enable_dictionary;
bool enable_transforms;
bool greedy_block_split;
bool enable_context_modeling;
};
// An instance can not be reused for multiple brotli streams.
class BrotliCompressor {
public:
explicit BrotliCompressor(BrotliParams params);
~BrotliCompressor(void);
// The maximum input size that can be processed at once.
size_t input_block_size(void) const { return size_t(1) << params_.lgblock; }
// Encodes the data in input_buffer as a meta-block and writes it to
// encoded_buffer (*encoded_size should be set to the size of
// encoded_buffer) and sets *encoded_size to the number of bytes that
// was written. The input_size must be <= input_block_size().
// Returns 0 if there was an error and 1 otherwise.
bool WriteMetaBlock(const size_t input_size,
const uint8_t* input_buffer,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer);
// Writes a metadata meta-block containing the given input to encoded_buffer.
// *encoded_size should be set to the size of the encoded_buffer.
// Sets *encoded_size to the number of bytes that was written.
// Note that the given input data will not be part of the sliding window and
// thus no backward references can be made to this data from subsequent
// metablocks.
bool WriteMetadata(const size_t input_size,
const uint8_t* input_buffer,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer);
// Writes a zero-length meta-block with end-of-input bit set to the
// internal output buffer and copies the output buffer to encoded_buffer
// (*encoded_size should be set to the size of encoded_buffer) and sets
// *encoded_size to the number of bytes written. Returns false if there was
// an error and true otherwise.
bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
// Copies the given input data to the internal ring buffer of the compressor.
// No processing of the data occurs at this time and this function can be
// called multiple times before calling WriteBrotliData() to process the
// accumulated input. At most input_block_size() bytes of input data can be
// copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
void CopyInputToRingBuffer(const size_t input_size,
const uint8_t* input_buffer);
// Processes the accumulated input data and sets *out_size to the length of
// the new output meta-block, or to zero if no new output meta-block was
// created (in this case the processed input data is buffered internally).
// If *out_size is positive, *output points to the start of the output data.
// If is_last or force_flush is true, an output meta-block is always created.
// Returns false if the size of the input data is larger than
// input_block_size().
bool WriteBrotliData(const bool is_last, const bool force_flush,
size_t* out_size, uint8_t** output);
// Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
// e.g. for custom static dictionaries for data formats.
// Not to be confused with the built-in transformable dictionary of Brotli.
// To decode, use BrotliSetCustomDictionary of the decoder with the same
// dictionary.
void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
// No-op, but we keep it here for API backward-compatibility.
void WriteStreamHeader(void) {}
private:
uint8_t* GetBrotliStorage(size_t size);
// Allocates and clears a hash table using memory in "*this",
// stores the number of buckets in "*table_size" and returns a pointer to
// the base of the hash table.
int* GetHashTable(int quality,
size_t input_size, size_t* table_size);
BrotliParams params_;
Hashers* hashers_;
int hash_type_;
uint64_t input_pos_;
RingBuffer* ringbuffer_;
size_t cmd_alloc_size_;
Command* commands_;
size_t num_commands_;
size_t num_literals_;
size_t last_insert_len_;
uint64_t last_flush_pos_;
uint64_t last_processed_pos_;
int dist_cache_[4];
int saved_dist_cache_[4];
uint8_t last_byte_;
uint8_t last_byte_bits_;
uint8_t prev_byte_;
uint8_t prev_byte2_;
size_t storage_size_;
uint8_t* storage_;
// Hash table for quality 0 mode.
int small_table_[1 << 10]; // 2KB
int* large_table_; // Allocated only when needed
// Command and distance prefix codes (each 64 symbols, stored back-to-back)
// used for the next block in quality 0. The command prefix code is over a
// smaller alphabet with the following 64 symbols:
// 0 - 15: insert length code 0, copy length code 0 - 15, same distance
// 16 - 39: insert length code 0, copy length code 0 - 23
// 40 - 63: insert length code 0 - 23, copy length code 0
// Note that symbols 16 and 40 represent the same code in the full alphabet,
// but we do not use either of them in quality 0.
uint8_t cmd_depths_[128];
uint16_t cmd_bits_[128];
// The compressed form of the command and distance prefix codes for the next
// block in quality 0.
uint8_t cmd_code_[512];
size_t cmd_code_numbits_;
// Command and literal buffers for quality 1.
uint32_t* command_buf_;
uint8_t* literal_buf_;
int is_last_block_emitted_;
};
// Compresses the data in input_buffer into encoded_buffer, and sets
// *encoded_size to the compressed length.
// Returns 0 if there was an error and 1 otherwise.
int BrotliCompressBuffer(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer);
// Same as above, but uses the specified input and output classes instead
// of reading from and writing to pre-allocated memory buffers.
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
// Before compressing the data, sets a custom LZ77 dictionary with
// BrotliCompressor::BrotliSetCustomDictionary.
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
BrotliParams params,
BrotliIn* in, BrotliOut* out);
} // namespace brotli
#endif // BROTLI_ENC_ENCODE_H_

Просмотреть файл

@ -0,0 +1,279 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Implementation of parallel Brotli compressor.
#include "./encode_parallel.h"
#include <algorithm>
#include <limits>
#include "./backward_references.h"
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./brotli_bit_stream.h"
#include "./cluster.h"
#include "./context.h"
#include "./metablock.h"
#include "./transform.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./hash.h"
#include "./histogram.h"
#include "./prefix.h"
#include "./utf8_util.h"
#include "./write_bits.h"
namespace brotli {
namespace {
void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
uint32_t num_direct_distance_codes,
uint32_t distance_postfix_bits) {
if (num_direct_distance_codes == 0 &&
distance_postfix_bits == 0) {
return;
}
for (size_t i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i];
if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes,
distance_postfix_bits,
&cmd->dist_prefix_,
&cmd->dist_extra_);
}
}
}
bool WriteMetaBlockParallel(const BrotliParams& params,
const uint32_t input_size,
const uint8_t* input_buffer,
const uint32_t prefix_size,
const uint8_t* prefix_buffer,
const bool is_first,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (input_size == 0) {
return false;
}
// Copy prefix + next input block into a continuous area.
uint32_t input_pos = prefix_size;
// CreateBackwardReferences reads up to 3 bytes past the end of input if the
// mask points past the end of input.
// FindMatchLengthWithLimit could do another 8 bytes look-forward.
std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
memcpy(&input[0], prefix_buffer, prefix_size);
memcpy(&input[input_pos], input_buffer, input_size);
// Since we don't have a ringbuffer, masking is a no-op.
// We use one less bit than the full range because some of the code uses
// mask + 1 as the size of the ringbuffer.
const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
// Decide about UTF8 mode.
static const double kMinUTF8Ratio = 0.75;
bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
kMinUTF8Ratio);
// Initialize hashers.
int hash_type = std::min(10, params.quality);
Hashers* hashers = new Hashers();
hashers->Init(hash_type);
// Compute backward references.
size_t last_insert_len = 0;
size_t num_commands = 0;
size_t num_literals = 0;
int dist_cache[4] = { -4, -4, -4, -4 };
Command* commands = static_cast<Command*>(
malloc(sizeof(Command) * ((input_size + 1) >> 1)));
if (commands == 0) {
delete hashers;
return false;
}
CreateBackwardReferences(
input_size, input_pos, is_last,
&input[0], mask,
params.quality,
params.lgwin,
hashers,
hash_type,
dist_cache,
&last_insert_len,
commands,
&num_commands,
&num_literals);
delete hashers;
if (last_insert_len > 0) {
commands[num_commands++] = Command(last_insert_len);
num_literals += last_insert_len;
}
assert(num_commands != 0);
// Build the meta-block.
MetaBlockSplit mb;
uint32_t num_direct_distance_codes =
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
uint32_t distance_postfix_bits =
params.mode == BrotliParams::MODE_FONT ? 1 : 0;
ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
RecomputeDistancePrefixes(commands, num_commands,
num_direct_distance_codes,
distance_postfix_bits);
if (params.quality <= 9) {
BuildMetaBlockGreedy(&input[0], input_pos, mask,
commands, num_commands,
&mb);
} else {
BuildMetaBlock(&input[0], input_pos, mask,
prev_byte, prev_byte2,
commands, num_commands,
literal_context_mode,
&mb);
}
// Set up the temporary output storage.
const size_t max_out_size = 2 * input_size + 500;
std::vector<uint8_t> storage(max_out_size);
uint8_t first_byte = 0;
size_t first_byte_bits = 0;
if (is_first) {
if (params.lgwin == 16) {
first_byte = 0;
first_byte_bits = 1;
} else if (params.lgwin == 17) {
first_byte = 1;
first_byte_bits = 7;
} else {
first_byte = static_cast<uint8_t>(((params.lgwin - 17) << 1) | 1);
first_byte_bits = 4;
}
}
storage[0] = static_cast<uint8_t>(first_byte);
size_t storage_ix = first_byte_bits;
// Store the meta-block to the temporary output.
StoreMetaBlock(&input[0], input_pos, input_size, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands, num_commands,
mb,
&storage_ix, &storage[0]);
free(commands);
// If this is not the last meta-block, store an empty metadata
// meta-block so that the meta-block will end at a byte boundary.
if (!is_last) {
StoreSyncMetaBlock(&storage_ix, &storage[0]);
}
// If the compressed data is too large, fall back to an uncompressed
// meta-block.
size_t output_size = storage_ix >> 3;
if (input_size + 4 < output_size) {
storage[0] = static_cast<uint8_t>(first_byte);
storage_ix = first_byte_bits;
StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
input_size,
&storage_ix, &storage[0]);
output_size = storage_ix >> 3;
}
// Copy the temporary output with size-check to the output.
if (output_size > *encoded_size) {
return false;
}
memcpy(encoded_buffer, &storage[0], output_size);
*encoded_size = output_size;
return true;
}
} // namespace
int BrotliCompressBufferParallel(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (*encoded_size == 0) {
// Output buffer needs at least one byte.
return 0;
} else if (input_size == 0) {
encoded_buffer[0] = 6;
*encoded_size = 1;
return 1;
}
// Sanitize params.
if (params.lgwin < kMinWindowBits) {
params.lgwin = kMinWindowBits;
} else if (params.lgwin > kMaxWindowBits) {
params.lgwin = kMaxWindowBits;
}
if (params.lgblock == 0) {
params.lgblock = 16;
if (params.quality >= 9 && params.lgwin > params.lgblock) {
params.lgblock = std::min(21, params.lgwin);
}
} else if (params.lgblock < kMinInputBlockBits) {
params.lgblock = kMinInputBlockBits;
} else if (params.lgblock > kMaxInputBlockBits) {
params.lgblock = kMaxInputBlockBits;
}
size_t max_input_block_size = 1 << params.lgblock;
size_t max_prefix_size = 1u << params.lgwin;
std::vector<std::vector<uint8_t> > compressed_pieces;
// Compress block-by-block independently.
for (size_t pos = 0; pos < input_size; ) {
uint32_t input_block_size =
static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
uint32_t prefix_size =
static_cast<uint32_t>(std::min(max_prefix_size, pos));
size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
std::vector<uint8_t> out(out_size);
if (!WriteMetaBlockParallel(params,
input_block_size,
&input_buffer[pos],
prefix_size,
&input_buffer[pos - prefix_size],
pos == 0,
pos + input_block_size == input_size,
&out_size,
&out[0])) {
return false;
}
out.resize(out_size);
compressed_pieces.push_back(out);
pos += input_block_size;
}
// Piece together the output.
size_t out_pos = 0;
for (size_t i = 0; i < compressed_pieces.size(); ++i) {
const std::vector<uint8_t>& out = compressed_pieces[i];
if (out_pos + out.size() > *encoded_size) {
return false;
}
memcpy(&encoded_buffer[out_pos], &out[0], out.size());
out_pos += out.size();
}
*encoded_size = out_pos;
return true;
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,28 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// API for parallel Brotli compression
// Note that this is only a proof of concept currently and not part of the
// final API yet.
#ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
#define BROTLI_ENC_ENCODE_PARALLEL_H_
#include "./encode.h"
#include "./types.h"
namespace brotli {
int BrotliCompressBufferParallel(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer);
} // namespace brotli
#endif // BROTLI_ENC_ENCODE_PARALLEL_H_

Просмотреть файл

@ -0,0 +1,480 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Entropy encoding (Huffman) utilities.
#include "./entropy_encode.h"
#include <algorithm>
#include <limits>
#include <cstdlib>
#include "./histogram.h"
#include "./port.h"
#include "./types.h"
namespace brotli {
void SetDepth(const HuffmanTree &p,
HuffmanTree *pool,
uint8_t *depth,
uint8_t level) {
if (p.index_left_ >= 0) {
++level;
SetDepth(pool[p.index_left_], pool, depth, level);
SetDepth(pool[p.index_right_or_value_], pool, depth, level);
} else {
depth[p.index_right_or_value_] = level;
}
}
// Sort the root nodes, least popular first.
static inline bool SortHuffmanTree(const HuffmanTree& v0,
const HuffmanTree& v1) {
if (v0.total_count_ != v1.total_count_) {
return v0.total_count_ < v1.total_count_;
}
return v0.index_right_or_value_ > v1.index_right_or_value_;
}
// This function will create a Huffman tree.
//
// The catch here is that the tree cannot be arbitrarily deep.
// Brotli specifies a maximum depth of 15 bits for "code trees"
// and 7 bits for "code length code trees."
//
// count_limit is the value that is to be faked as the minimum value
// and this minimum value is raised until the tree matches the
// maximum length requirement.
//
// This algorithm is not of excellent performance for very long data blocks,
// especially when population counts are longer than 2**tree_limit, but
// we are not planning to use this with extremely long blocks.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t *depth) {
// For block sizes below 64 kB, we never need to do a second iteration
// of this loop. Probably all of our block sizes will be smaller than
// that, so this loop is mostly of academic interest. If we actually
// would need this, we would be better off with the Katajainen algorithm.
for (uint32_t count_limit = 1; ; count_limit *= 2) {
size_t n = 0;
for (size_t i = length; i != 0;) {
--i;
if (data[i]) {
const uint32_t count = std::max(data[i], count_limit);
tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
}
}
if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; // Only one element.
break;
}
std::sort(tree, tree + n, SortHuffmanTree);
// The nodes are:
// [0, n): the sorted leaf nodes that we start with.
// [n]: we add a sentinel here.
// [n + 1, 2n): new parent nodes are added here, starting from
// (n+1). These are naturally in ascending order.
// [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end.
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
tree[n] = sentinel;
tree[n + 1] = sentinel;
size_t i = 0; // Points to the next leaf node.
size_t j = n + 1; // Points to the next non-leaf node.
for (size_t k = n - 1; k != 0; --k) {
size_t left, right;
if (tree[i].total_count_ <= tree[j].total_count_) {
left = i;
++i;
} else {
left = j;
++j;
}
if (tree[i].total_count_ <= tree[j].total_count_) {
right = i;
++i;
} else {
right = j;
++j;
}
// The sentinel node becomes the parent node.
size_t j_end = 2 * n - k;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = static_cast<int16_t>(left);
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
// Add back the last sentinel node.
tree[j_end + 1] = sentinel;
}
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in tree_limit bits.
// If this was not successful, add fake entities to the lowest values
// and retry.
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
break;
}
}
}
static void Reverse(uint8_t* v, size_t start, size_t end) {
--end;
while (start < end) {
uint8_t tmp = v[start];
v[start] = v[end];
v[end] = tmp;
++start;
--end;
}
}
static void WriteHuffmanTreeRepetitions(
const uint8_t previous_value,
const uint8_t value,
size_t repetitions,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
assert(repetitions > 0);
if (previous_value != value) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions == 7) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
for (size_t i = 0; i < repetitions; ++i) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
repetitions -= 3;
size_t start = *tree_size;
while (true) {
tree[*tree_size] = 16;
extra_bits_data[*tree_size] = repetitions & 0x3;
++(*tree_size);
repetitions >>= 2;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
static void WriteHuffmanTreeRepetitionsZeros(
size_t repetitions,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
if (repetitions == 11) {
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
for (size_t i = 0; i < repetitions; ++i) {
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
repetitions -= 3;
size_t start = *tree_size;
while (true) {
tree[*tree_size] = 17;
extra_bits_data[*tree_size] = repetitions & 0x7;
++(*tree_size);
repetitions >>= 3;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle) {
size_t nonzero_count = 0;
size_t stride;
size_t limit;
size_t sum;
const size_t streak_limit = 1240;
// Let's make the Huffman code more compatible with rle encoding.
size_t i;
for (i = 0; i < length; i++) {
if (counts[i]) {
++nonzero_count;
}
}
if (nonzero_count < 16) {
return;
}
while (length != 0 && counts[length - 1] == 0) {
--length;
}
if (length == 0) {
return; // All zeros.
}
// Now counts[0..length - 1] does not have trailing zeros.
{
size_t nonzeros = 0;
uint32_t smallest_nonzero = 1 << 30;
for (i = 0; i < length; ++i) {
if (counts[i] != 0) {
++nonzeros;
if (smallest_nonzero > counts[i]) {
smallest_nonzero = counts[i];
}
}
}
if (nonzeros < 5) {
// Small histogram will model it well.
return;
}
size_t zeros = length - nonzeros;
if (smallest_nonzero < 4) {
if (zeros < 6) {
for (i = 1; i < length - 1; ++i) {
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
counts[i] = 1;
}
}
}
}
if (nonzeros < 28) {
return;
}
}
// 2) Let's mark all population counts that already can be encoded
// with an rle code.
memset(good_for_rle, 0, length);
{
// Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
uint32_t symbol = counts[0];
size_t step = 0;
for (i = 0; i <= length; ++i) {
if (i == length || counts[i] != symbol) {
if ((symbol == 0 && step >= 5) ||
(symbol != 0 && step >= 7)) {
size_t k;
for (k = 0; k < step; ++k) {
good_for_rle[i - k - 1] = 1;
}
}
step = 1;
if (i != length) {
symbol = counts[i];
}
} else {
++step;
}
}
}
// 3) Let's replace those population counts that lead to more rle codes.
// Math here is in 24.8 fixed point representation.
stride = 0;
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
sum = 0;
for (i = 0; i <= length; ++i) {
if (i == length || good_for_rle[i] ||
(i != 0 && good_for_rle[i - 1]) ||
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
size_t k;
// The stride must end, collapse what we have, if we have enough (4).
size_t count = (sum + stride / 2) / stride;
if (count == 0) {
count = 1;
}
if (sum == 0) {
// Don't make an all zeros stride to be upgraded to ones.
count = 0;
}
for (k = 0; k < stride; ++k) {
// We don't want to change value at counts[i],
// that is already belonging to the next stride. Thus - 1.
counts[i - k - 1] = static_cast<uint32_t>(count);
}
}
stride = 0;
sum = 0;
if (i < length - 2) {
// All interesting strides have a count of at least 4,
// at least when non-zeros.
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
} else if (i < length) {
limit = 256 * counts[i];
} else {
limit = 0;
}
}
++stride;
if (i != length) {
sum += counts[i];
if (stride >= 4) {
limit = (256 * sum + stride / 2) / stride;
}
if (stride == 4) {
limit += 120;
}
}
}
}
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
bool *use_rle_for_non_zero,
bool *use_rle_for_zero) {
size_t total_reps_zero = 0;
size_t total_reps_non_zero = 0;
size_t count_reps_zero = 1;
size_t count_reps_non_zero = 1;
for (size_t i = 0; i < length;) {
const uint8_t value = depth[i];
size_t reps = 1;
for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
if (reps >= 3 && value == 0) {
total_reps_zero += reps;
++count_reps_zero;
}
if (reps >= 4 && value != 0) {
total_reps_non_zero += reps;
++count_reps_non_zero;
}
i += reps;
}
*use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
*use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
}
void WriteHuffmanTree(const uint8_t* depth,
size_t length,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
uint8_t previous_value = 8;
// Throw away trailing zeros.
size_t new_length = length;
for (size_t i = 0; i < length; ++i) {
if (depth[length - i - 1] == 0) {
--new_length;
} else {
break;
}
}
// First gather statistics on if it is a good idea to do rle.
bool use_rle_for_non_zero = false;
bool use_rle_for_zero = false;
if (length > 50) {
// Find rle coding for longer codes.
// Shorter codes seem not to benefit from rle.
DecideOverRleUse(depth, new_length,
&use_rle_for_non_zero, &use_rle_for_zero);
}
// Actual rle coding.
for (size_t i = 0; i < new_length;) {
const uint8_t value = depth[i];
size_t reps = 1;
if ((value != 0 && use_rle_for_non_zero) ||
(value == 0 && use_rle_for_zero)) {
for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
++reps;
}
}
if (value == 0) {
WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
} else {
WriteHuffmanTreeRepetitions(previous_value,
value, reps, tree_size,
tree, extra_bits_data);
previous_value = value;
}
i += reps;
}
}
namespace {
uint16_t ReverseBits(int num_bits, uint16_t bits) {
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
};
size_t retval = kLut[bits & 0xf];
for (int i = 4; i < num_bits; i += 4) {
retval <<= 4;
bits = static_cast<uint16_t>(bits >> 4);
retval |= kLut[bits & 0xf];
}
retval >>= (-num_bits & 0x3);
return static_cast<uint16_t>(retval);
}
} // namespace
void ConvertBitDepthsToSymbols(const uint8_t *depth,
size_t len,
uint16_t *bits) {
// In Brotli, all bit depths are [1..15]
// 0 bit depth means that the symbol does not exist.
const int kMaxBits = 16; // 0..15 are values for bits
uint16_t bl_count[kMaxBits] = { 0 };
{
for (size_t i = 0; i < len; ++i) {
++bl_count[depth[i]];
}
bl_count[0] = 0;
}
uint16_t next_code[kMaxBits];
next_code[0] = 0;
{
int code = 0;
for (int bits = 1; bits < kMaxBits; ++bits) {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = static_cast<uint16_t>(code);
}
}
for (size_t i = 0; i < len; ++i) {
if (depth[i]) {
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
}
}
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,104 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Entropy encoding (Huffman) utilities.
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
#define BROTLI_ENC_ENTROPY_ENCODE_H_
#include <string.h>
#include "./histogram.h"
#include "./prefix.h"
#include "./types.h"
namespace brotli {
// A node of a Huffman tree.
struct HuffmanTree {
HuffmanTree() {}
HuffmanTree(uint32_t count, int16_t left, int16_t right)
: total_count_(count),
index_left_(left),
index_right_or_value_(right) {
}
uint32_t total_count_;
int16_t index_left_;
int16_t index_right_or_value_;
};
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
uint8_t *depth, uint8_t level);
// This function will create a Huffman tree.
//
// The (data,length) contains the population counts.
// The tree_limit is the maximum bit depth of the Huffman codes.
//
// The depth contains the tree, i.e., how many bits are used for
// the symbol.
//
// The actual Huffman tree is constructed in the tree[] array, which has to
// be at least 2 * length + 1 long.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t *depth);
// Change the population counts in a way that the consequent
// Huffman tree compression, especially its rle-part will be more
// likely to compress this data more efficiently.
//
// length contains the size of the histogram.
// counts contains the population counts.
// good_for_rle is a buffer of at least length size
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle);
// Write a Huffman tree from bit depths into the bitstream representation
// of a Huffman tree. The generated Huffman tree is to be compressed once
// more using a Huffman tree
void WriteHuffmanTree(const uint8_t* depth,
size_t num,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data);
// Get the actual bit values for a tree of bit depths.
void ConvertBitDepthsToSymbols(const uint8_t *depth,
size_t len,
uint16_t *bits);
template<int kSize>
struct EntropyCode {
// How many bits for symbol.
uint8_t depth_[kSize];
// Actual bits used to represent the symbol.
uint16_t bits_[kSize];
// How many non-zero depth.
int count_;
// First four symbols with non-zero depth.
int symbols_[4];
};
static const int kCodeLengthCodes = 18;
// Literal entropy code.
typedef EntropyCode<256> EntropyCodeLiteral;
// Prefix entropy codes.
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
typedef EntropyCode<272> EntropyCodeContextMap;
// Block type entropy code, 256 block types + 2 special symbols.
typedef EntropyCode<258> EntropyCodeBlockType;
} // namespace brotli
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_

Просмотреть файл

@ -0,0 +1,572 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Static entropy codes used for faster meta-block encoding.
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#include "./prefix.h"
#include "./types.h"
#include "./write_bits.h"
namespace brotli {
static const uint8_t kCodeLengthDepth[18] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
};
static const uint8_t kStaticCommandCodeDepth[kNumCommandPrefixes] = {
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
};
static const uint8_t kStaticDistanceCodeDepth[64] = {
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
};
static const uint32_t kCodeLengthBits[18] = {
0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
};
inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
}
static const uint64_t kZeroRepsBits[704] = {
0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
0x06f9cb87, 0x08f9cb87,
};
static const uint32_t kZeroRepsDepth[704] = {
0, 4, 8, 7, 7, 7, 7, 7, 7, 7, 7, 11, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
};
static const uint64_t kNonZeroRepsBits[704] = {
0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
0x2baeb6db, 0x3baeb6db,
};
static const uint32_t kNonZeroRepsDepth[704] = {
6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
};
static const uint16_t kStaticLiteralCodeBits[256] = {
0, 128, 64, 192, 32, 160, 96, 224,
16, 144, 80, 208, 48, 176, 112, 240,
8, 136, 72, 200, 40, 168, 104, 232,
24, 152, 88, 216, 56, 184, 120, 248,
4, 132, 68, 196, 36, 164, 100, 228,
20, 148, 84, 212, 52, 180, 116, 244,
12, 140, 76, 204, 44, 172, 108, 236,
28, 156, 92, 220, 60, 188, 124, 252,
2, 130, 66, 194, 34, 162, 98, 226,
18, 146, 82, 210, 50, 178, 114, 242,
10, 138, 74, 202, 42, 170, 106, 234,
26, 154, 90, 218, 58, 186, 122, 250,
6, 134, 70, 198, 38, 166, 102, 230,
22, 150, 86, 214, 54, 182, 118, 246,
14, 142, 78, 206, 46, 174, 110, 238,
30, 158, 94, 222, 62, 190, 126, 254,
1, 129, 65, 193, 33, 161, 97, 225,
17, 145, 81, 209, 49, 177, 113, 241,
9, 137, 73, 201, 41, 169, 105, 233,
25, 153, 89, 217, 57, 185, 121, 249,
5, 133, 69, 197, 37, 165, 101, 229,
21, 149, 85, 213, 53, 181, 117, 245,
13, 141, 77, 205, 45, 173, 109, 237,
29, 157, 93, 221, 61, 189, 125, 253,
3, 131, 67, 195, 35, 163, 99, 227,
19, 147, 83, 211, 51, 179, 115, 243,
11, 139, 75, 203, 43, 171, 107, 235,
27, 155, 91, 219, 59, 187, 123, 251,
7, 135, 71, 199, 39, 167, 103, 231,
23, 151, 87, 215, 55, 183, 119, 247,
15, 143, 79, 207, 47, 175, 111, 239,
31, 159, 95, 223, 63, 191, 127, 255,
};
inline void StoreStaticLiteralHuffmanTree(size_t* storage_ix,
uint8_t* storage) {
WriteBits(32, 0x00010003U, storage_ix, storage);
}
static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
0, 256, 128, 384, 64, 320, 192, 448,
32, 288, 160, 416, 96, 352, 224, 480,
16, 272, 144, 400, 80, 336, 208, 464,
48, 304, 176, 432, 112, 368, 240, 496,
8, 264, 136, 392, 72, 328, 200, 456,
40, 296, 168, 424, 104, 360, 232, 488,
24, 280, 152, 408, 88, 344, 216, 472,
56, 312, 184, 440, 120, 376, 248, 504,
4, 260, 132, 388, 68, 324, 196, 452,
36, 292, 164, 420, 100, 356, 228, 484,
20, 276, 148, 404, 84, 340, 212, 468,
52, 308, 180, 436, 116, 372, 244, 500,
12, 268, 140, 396, 76, 332, 204, 460,
44, 300, 172, 428, 108, 364, 236, 492,
28, 284, 156, 412, 92, 348, 220, 476,
60, 316, 188, 444, 124, 380, 252, 508,
2, 258, 130, 386, 66, 322, 194, 450,
34, 290, 162, 418, 98, 354, 226, 482,
18, 274, 146, 402, 82, 338, 210, 466,
50, 306, 178, 434, 114, 370, 242, 498,
10, 266, 138, 394, 74, 330, 202, 458,
42, 298, 170, 426, 106, 362, 234, 490,
26, 282, 154, 410, 90, 346, 218, 474,
58, 314, 186, 442, 122, 378, 250, 506,
6, 262, 134, 390, 70, 326, 198, 454,
38, 294, 166, 422, 102, 358, 230, 486,
22, 278, 150, 406, 86, 342, 214, 470,
54, 310, 182, 438, 118, 374, 246, 502,
14, 270, 142, 398, 78, 334, 206, 462,
46, 302, 174, 430, 110, 366, 238, 494,
30, 286, 158, 414, 94, 350, 222, 478,
62, 318, 190, 446, 126, 382, 254, 510,
1, 257, 129, 385, 65, 321, 193, 449,
33, 289, 161, 417, 97, 353, 225, 481,
17, 273, 145, 401, 81, 337, 209, 465,
49, 305, 177, 433, 113, 369, 241, 497,
9, 265, 137, 393, 73, 329, 201, 457,
41, 297, 169, 425, 105, 361, 233, 489,
25, 281, 153, 409, 89, 345, 217, 473,
57, 313, 185, 441, 121, 377, 249, 505,
5, 261, 133, 389, 69, 325, 197, 453,
37, 293, 165, 421, 101, 357, 229, 485,
21, 277, 149, 405, 85, 341, 213, 469,
53, 309, 181, 437, 117, 373, 245, 501,
13, 269, 141, 397, 77, 333, 205, 461,
45, 301, 173, 429, 109, 365, 237, 493,
29, 285, 157, 413, 93, 349, 221, 477,
61, 317, 189, 445, 125, 381, 253, 509,
3, 259, 131, 387, 67, 323, 195, 451,
35, 291, 163, 419, 99, 355, 227, 483,
19, 275, 147, 403, 83, 339, 211, 467,
51, 307, 179, 435, 115, 371, 243, 499,
11, 267, 139, 395, 75, 331, 203, 459,
43, 299, 171, 427, 107, 363, 235, 491,
27, 283, 155, 411, 91, 347, 219, 475,
59, 315, 187, 443, 123, 379, 251, 507,
7, 1031, 519, 1543, 263, 1287, 775, 1799,
135, 1159, 647, 1671, 391, 1415, 903, 1927,
71, 1095, 583, 1607, 327, 1351, 839, 1863,
199, 1223, 711, 1735, 455, 1479, 967, 1991,
39, 1063, 551, 1575, 295, 1319, 807, 1831,
167, 1191, 679, 1703, 423, 1447, 935, 1959,
103, 1127, 615, 1639, 359, 1383, 871, 1895,
231, 1255, 743, 1767, 487, 1511, 999, 2023,
23, 1047, 535, 1559, 279, 1303, 791, 1815,
151, 1175, 663, 1687, 407, 1431, 919, 1943,
87, 1111, 599, 1623, 343, 1367, 855, 1879,
215, 1239, 727, 1751, 471, 1495, 983, 2007,
55, 1079, 567, 1591, 311, 1335, 823, 1847,
183, 1207, 695, 1719, 439, 1463, 951, 1975,
119, 1143, 631, 1655, 375, 1399, 887, 1911,
247, 1271, 759, 1783, 503, 1527, 1015, 2039,
15, 1039, 527, 1551, 271, 1295, 783, 1807,
143, 1167, 655, 1679, 399, 1423, 911, 1935,
79, 1103, 591, 1615, 335, 1359, 847, 1871,
207, 1231, 719, 1743, 463, 1487, 975, 1999,
47, 1071, 559, 1583, 303, 1327, 815, 1839,
175, 1199, 687, 1711, 431, 1455, 943, 1967,
111, 1135, 623, 1647, 367, 1391, 879, 1903,
239, 1263, 751, 1775, 495, 1519, 1007, 2031,
31, 1055, 543, 1567, 287, 1311, 799, 1823,
159, 1183, 671, 1695, 415, 1439, 927, 1951,
95, 1119, 607, 1631, 351, 1375, 863, 1887,
223, 1247, 735, 1759, 479, 1503, 991, 2015,
63, 1087, 575, 1599, 319, 1343, 831, 1855,
191, 1215, 703, 1727, 447, 1471, 959, 1983,
127, 1151, 639, 1663, 383, 1407, 895, 1919,
255, 1279, 767, 1791, 511, 1535, 1023, 2047,
};
inline void StoreStaticCommandHuffmanTree(size_t* storage_ix,
uint8_t* storage) {
WriteBits(28, 0x0000000006307003U, storage_ix, storage);
WriteBits(31, 0x0000000009262441U, storage_ix, storage);
}
static const uint16_t kStaticDistanceCodeBits[64] = {
0, 32, 16, 48, 8, 40, 24, 56, 4, 36, 20, 52, 12, 44, 28, 60,
2, 34, 18, 50, 10, 42, 26, 58, 6, 38, 22, 54, 14, 46, 30, 62,
1, 33, 17, 49, 9, 41, 25, 57, 5, 37, 21, 53, 13, 45, 29, 61,
3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63,
};
inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
uint8_t* storage) {
WriteBits(18, 0x000000000001dc03U, storage_ix, storage);
WriteBits(10, 0x00000000000000daU, storage_ix, storage);
}
} // namespace brotli
#endif // BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_

Просмотреть файл

@ -0,0 +1,139 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Utilities for fast computation of logarithms.
#ifndef BROTLI_ENC_FAST_LOG_H_
#define BROTLI_ENC_FAST_LOG_H_
#include <assert.h>
#include <math.h>
#include "./types.h"
namespace brotli {
static inline uint32_t Log2FloorNonZero(size_t n) {
#ifdef __GNUC__
return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
#else
uint32_t result = 0;
while (n >>= 1) result++;
return result;
#endif
}
// A lookup table for small values of log2(int) to be used in entropy
// computation.
//
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
static const float kLog2Table[] = {
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
7.9943534368588578f
};
// Faster logarithm for small integers, with the property of log2(0) == 0.
static inline double FastLog2(size_t v) {
if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
return kLog2Table[v];
}
#if defined(_MSC_VER) && _MSC_VER <= 1700
// Visual Studio 2012 does not have the log2() function defined, so we use
// log() and a multiplication instead.
static const double kLog2Inv = 1.4426950408889634f;
return log(static_cast<double>(v)) * kLog2Inv;
#else
return log2(static_cast<double>(v));
#endif
}
} // namespace brotli
#endif // BROTLI_ENC_FAST_LOG_H_

Просмотреть файл

@ -0,0 +1,77 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function to find maximal matching prefixes of strings.
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
#include "./port.h"
#include "./types.h"
namespace brotli {
// Separate implementation for little-endian 64-bit targets, for speed.
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
size_t matched = 0;
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
while (PREDICT_TRUE(--limit2)) {
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
s2 += 8;
matched += 8;
} else {
uint64_t x =
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
matched += matching_bits >> 3;
return matched;
}
}
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
while (--limit) {
if (PREDICT_TRUE(s1[matched] == *s2)) {
++s2;
++matched;
} else {
return matched;
}
}
return matched;
}
#else
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
size_t matched = 0;
const uint8_t* s2_limit = s2 + limit;
const uint8_t* s2_ptr = s2;
// Find out how long the match is. We loop over the data 32 bits at a
// time until we find a 32-bit block that doesn't match; then we find
// the first non-matching bit and use that to calculate the total
// length of the match.
while (s2_ptr <= s2_limit - 4 &&
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
s2_ptr += 4;
matched += 4;
}
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
++s2_ptr;
++matched;
}
return matched;
}
#endif
} // namespace brotli
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_

974
modules/brotli/enc/hash.h Normal file
Просмотреть файл

@ -0,0 +1,974 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
#ifndef BROTLI_ENC_HASH_H_
#define BROTLI_ENC_HASH_H_
#include <sys/types.h>
#include <algorithm>
#include <cstring>
#include <limits>
#include "./dictionary_hash.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./port.h"
#include "./prefix.h"
#include "./static_dict.h"
#include "./transform.h"
#include "./types.h"
namespace brotli {
static const size_t kMaxTreeSearchDepth = 64;
static const size_t kMaxTreeCompLength = 128;
static const uint32_t kDistanceCacheIndex[] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
};
static const int kDistanceCacheOffset[] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
static const uint32_t kCutoffTransformsCount = 10;
static const uint8_t kCutoffTransforms[] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
};
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
static const uint32_t kHashMul32 = 0x1e35a7bd;
template<int kShiftBits>
inline uint32_t Hash(const uint8_t *data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return h >> (32 - kShiftBits);
}
// Usually, we always choose the longest backward reference. This function
// allows for the exception of that rule.
//
// If we choose a backward reference that is further away, it will
// usually be coded with more bits. We approximate this by assuming
// log2(distance). If the distance can be expressed in terms of the
// last four distances, we use some heuristic constants to estimate
// the bits cost. For the first up to four literals we use the bit
// cost of the literals from the literal cost model, after that we
// use the average bit cost of the cost model.
//
// This function is used to sometimes discard a longer backward reference
// when it is not much longer and the bit cost for encoding it is more
// than the saved literals.
//
// backward_reference_offset MUST be positive.
inline double BackwardReferenceScore(size_t copy_length,
size_t backward_reference_offset) {
return 5.4 * static_cast<double>(copy_length) -
1.20 * Log2FloorNonZero(backward_reference_offset);
}
inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
size_t distance_short_code) {
static const double kDistanceShortCodeBitCost[16] = {
-0.6, 0.95, 1.17, 1.27,
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
};
return 5.4 * static_cast<double>(copy_length) -
kDistanceShortCodeBitCost[distance_short_code];
}
struct BackwardMatch {
BackwardMatch(void) : distance(0), length_and_code(0) {}
BackwardMatch(size_t dist, size_t len)
: distance(static_cast<uint32_t>(dist))
, length_and_code(static_cast<uint32_t>(len << 5)) {}
BackwardMatch(size_t dist, size_t len, size_t len_code)
: distance(static_cast<uint32_t>(dist))
, length_and_code(static_cast<uint32_t>(
(len << 5) | (len == len_code ? 0 : len_code))) {}
size_t length(void) const {
return length_and_code >> 5;
}
size_t length_code(void) const {
size_t code = length_and_code & 31;
return code ? code : length();
}
uint32_t distance;
uint32_t length_and_code;
};
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
//
// This is a hash map of fixed size (kBucketSize). Starting from the
// given index, kBucketSweep buckets are used to store values of a key.
template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
class HashLongestMatchQuickly {
public:
HashLongestMatchQuickly(void) {
Reset();
}
void Reset(void) {
need_init_ = true;
num_dict_lookups_ = 0;
num_dict_matches_ = 0;
}
void Init(void) {
if (need_init_) {
// It is not strictly necessary to fill this buffer here, but
// not filling will make the results of the compression stochastic
// (but correct). This is because random data would cause the
// system to find accidentally good backward references here and there.
memset(&buckets_[0], 0, sizeof(buckets_));
need_init_ = false;
}
}
void InitForData(const uint8_t* data, size_t num) {
for (size_t i = 0; i < num; ++i) {
const uint32_t key = HashBytes(&data[i]);
memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
need_init_ = false;
}
}
// Look at 4 bytes at data.
// Compute a hash from these, and store the value somewhere within
// [ix .. ix+3].
inline void Store(const uint8_t *data, const uint32_t ix) {
const uint32_t key = HashBytes(data);
// Wiggle the value with the bucket sweep range.
const uint32_t off = (ix >> 3) % kBucketSweep;
buckets_[key + off] = ix;
}
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
// up to the length of max_length and stores the position cur_ix in the
// hash table.
//
// Does not look for matches longer than max_length.
// Does not look for matches further away than max_backward.
// Writes the best found match length into best_len_out.
// Writes the index (&data[index]) of the start of the best match into
// best_distance_out.
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
const size_t ring_buffer_mask,
const int* __restrict distance_cache,
const size_t cur_ix,
const size_t max_length,
const size_t max_backward,
size_t * __restrict best_len_out,
size_t * __restrict best_len_code_out,
size_t * __restrict best_distance_out,
double* __restrict best_score_out) {
const size_t best_len_in = *best_len_out;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
double best_score = *best_score_out;
size_t best_len = best_len_in;
size_t cached_backward = static_cast<size_t>(distance_cache[0]);
size_t prev_ix = cur_ix - cached_backward;
bool match_found = false;
if (prev_ix < cur_ix) {
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
if (compare_char == ring_buffer[prev_ix + best_len]) {
size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
best_len = len;
*best_len_out = len;
*best_len_code_out = len;
*best_distance_out = cached_backward;
*best_score_out = best_score;
compare_char = ring_buffer[cur_ix_masked + best_len];
if (kBucketSweep == 1) {
buckets_[key] = static_cast<uint32_t>(cur_ix);
return true;
} else {
match_found = true;
}
}
}
}
if (kBucketSweep == 1) {
// Only one to look for, don't bother to prepare for a loop.
prev_ix = buckets_[key];
buckets_[key] = static_cast<uint32_t>(cur_ix);
size_t backward = cur_ix - prev_ix;
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
return false;
}
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
return false;
}
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
*best_len_out = len;
*best_len_code_out = len;
*best_distance_out = backward;
*best_score_out = BackwardReferenceScore(len, backward);
return true;
}
} else {
uint32_t *bucket = buckets_ + key;
prev_ix = *bucket++;
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
const size_t backward = cur_ix - prev_ix;
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
if (compare_char != ring_buffer[prev_ix + best_len]) {
continue;
}
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
continue;
}
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
const double score = BackwardReferenceScore(len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = score;
compare_char = ring_buffer[cur_ix_masked + best_len];
match_found = true;
}
}
}
}
if (kUseDictionary && !match_found &&
num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
++num_dict_lookups_;
const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
const uint16_t v = kStaticDictionaryHash[dict_key];
if (v > 0) {
const uint32_t len = v & 31;
const uint32_t dist = v >> 5;
const size_t offset =
kBrotliDictionaryOffsetsByLength[len] + len * dist;
if (len <= max_length) {
const size_t matchlen =
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
&kBrotliDictionary[offset], len);
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
const size_t transform_id = kCutoffTransforms[len - matchlen];
const size_t word_id =
transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
dist;
const size_t backward = max_backward + word_id + 1;
const double score = BackwardReferenceScore(matchlen, backward);
if (best_score < score) {
++num_dict_matches_;
best_score = score;
best_len = matchlen;
*best_len_out = best_len;
*best_len_code_out = len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
}
}
const uint32_t off = (cur_ix >> 3) % kBucketSweep;
buckets_[key + off] = static_cast<uint32_t>(cur_ix);
return match_found;
}
enum { kHashLength = 5 };
enum { kHashTypeLength = 8 };
// HashBytes is the function that chooses the bucket to place
// the address in. The HashLongestMatch and HashLongestMatchQuickly
// classes have separate, different implementations of hashing.
static uint32_t HashBytes(const uint8_t *data) {
// Computing a hash based on 5 bytes works much better for
// qualities 1 and 3, where the next hash value is likely to replace
uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return static_cast<uint32_t>(h >> (64 - kBucketBits));
}
enum { kHashMapSize = 4 << kBucketBits };
private:
static const uint32_t kBucketSize = 1 << kBucketBits;
uint32_t buckets_[kBucketSize + kBucketSweep];
// True if buckets_ array needs to be initialized.
bool need_init_;
size_t num_dict_lookups_;
size_t num_dict_matches_;
};
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
//
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
// index positions of the given hash key in the compressed data.
template <int kBucketBits,
int kBlockBits,
int kNumLastDistancesToCheck>
class HashLongestMatch {
public:
HashLongestMatch(void) {
Reset();
}
void Reset(void) {
need_init_ = true;
num_dict_lookups_ = 0;
num_dict_matches_ = 0;
}
void Init(void) {
if (need_init_) {
memset(&num_[0], 0, sizeof(num_));
need_init_ = false;
}
}
void InitForData(const uint8_t* data, size_t num) {
for (size_t i = 0; i < num; ++i) {
const uint32_t key = HashBytes(&data[i]);
num_[key] = 0;
need_init_ = false;
}
}
// Look at 3 bytes at data.
// Compute a hash from these, and store the value of ix at that position.
inline void Store(const uint8_t *data, const uint32_t ix) {
const uint32_t key = HashBytes(data);
const int minor_ix = num_[key] & kBlockMask;
buckets_[key][minor_ix] = ix;
++num_[key];
}
// Find a longest backward match of &data[cur_ix] up to the length of
// max_length and stores the position cur_ix in the hash table.
//
// Does not look for matches longer than max_length.
// Does not look for matches further away than max_backward.
// Writes the best found match length into best_len_out.
// Writes the index (&data[index]) offset from the start of the best match
// into best_distance_out.
// Write the score of the best match into best_score_out.
bool FindLongestMatch(const uint8_t * __restrict data,
const size_t ring_buffer_mask,
const int* __restrict distance_cache,
const size_t cur_ix,
const size_t max_length,
const size_t max_backward,
size_t * __restrict best_len_out,
size_t * __restrict best_len_code_out,
size_t * __restrict best_distance_out,
double * __restrict best_score_out) {
*best_len_code_out = 0;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
bool match_found = false;
// Don't accept a short copy from far away.
double best_score = *best_score_out;
size_t best_len = *best_len_out;
*best_len_out = 0;
// Try last distance first.
for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
const size_t idx = kDistanceCacheIndex[i];
const size_t backward =
static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
size_t prev_ix = static_cast<size_t>(cur_ix - backward);
if (prev_ix >= cur_ix) {
continue;
}
if (PREDICT_FALSE(backward > max_backward)) {
continue;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
&data[cur_ix_masked],
max_length);
if (len >= 3 || (len == 2 && i < 2)) {
// Comparing for >= 2 does not change the semantics, but just saves for
// a few unnecessary binary logarithms in backward reference score,
// since we are not interested in such short matches.
double score = BackwardReferenceScoreUsingLastDistance(len, i);
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
const uint32_t key = HashBytes(&data[cur_ix_masked]);
const uint32_t * __restrict const bucket = &buckets_[key][0];
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (size_t i = num_[key]; i > down;) {
--i;
size_t prev_ix = bucket[i & kBlockMask];
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
&data[cur_ix_masked],
max_length);
if (len >= 4) {
// Comparing for >= 3 does not change the semantics, but just saves
// for a few unnecessary binary logarithms in backward reference
// score, since we are not interested in such short matches.
double score = BackwardReferenceScore(len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
++num_[key];
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
for (int k = 0; k < 2; ++k, ++dict_key) {
++num_dict_lookups_;
const uint16_t v = kStaticDictionaryHash[dict_key];
if (v > 0) {
const size_t len = v & 31;
const size_t dist = v >> 5;
const size_t offset =
kBrotliDictionaryOffsetsByLength[len] + len * dist;
if (len <= max_length) {
const size_t matchlen =
FindMatchLengthWithLimit(&data[cur_ix_masked],
&kBrotliDictionary[offset], len);
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
const size_t transform_id = kCutoffTransforms[len - matchlen];
const size_t word_id =
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
dist;
const size_t backward = max_backward + word_id + 1;
double score = BackwardReferenceScore(matchlen, backward);
if (best_score < score) {
++num_dict_matches_;
best_score = score;
best_len = matchlen;
*best_len_out = best_len;
*best_len_code_out = len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
}
}
}
return match_found;
}
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
// length of max_length and stores the position cur_ix in the hash table.
//
// Sets *num_matches to the number of matches found, and stores the found
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
// sorted by strictly increasing length and (non-strictly) increasing
// distance.
size_t FindAllMatches(const uint8_t* data,
const size_t ring_buffer_mask,
const size_t cur_ix,
const size_t max_length,
const size_t max_backward,
BackwardMatch* matches) {
BackwardMatch* const orig_matches = matches;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
size_t best_len = 1;
size_t stop = cur_ix - 64;
if (cur_ix < 64) { stop = 0; }
for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
size_t prev_ix = i;
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (data[cur_ix_masked] != data[prev_ix] ||
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
best_len = len;
*matches++ = BackwardMatch(backward, len);
}
}
const uint32_t key = HashBytes(&data[cur_ix_masked]);
const uint32_t * __restrict const bucket = &buckets_[key][0];
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (size_t i = num_[key]; i > down;) {
--i;
size_t prev_ix = bucket[i & kBlockMask];
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
best_len = len;
*matches++ = BackwardMatch(backward, len);
}
}
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
++num_[key];
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
dict_matches[i] = kInvalidMatch;
}
size_t minlen = std::max<size_t>(4, best_len + 1);
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
&dict_matches[0])) {
size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
for (size_t l = minlen; l <= maxlen; ++l) {
uint32_t dict_id = dict_matches[l];
if (dict_id < kInvalidMatch) {
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
dict_id & 31);
}
}
}
return static_cast<size_t>(matches - orig_matches);
}
enum { kHashLength = 4 };
enum { kHashTypeLength = 4 };
// HashBytes is the function that chooses the bucket to place
// the address in. The HashLongestMatch and HashLongestMatchQuickly
// classes have separate, different implementations of hashing.
static uint32_t HashBytes(const uint8_t *data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return h >> (32 - kBucketBits);
}
enum { kHashMapSize = 2 << kBucketBits };
static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
private:
// Number of hash buckets.
static const uint32_t kBucketSize = 1 << kBucketBits;
// Only kBlockSize newest backward references are kept,
// and the older are forgotten.
static const uint32_t kBlockSize = 1 << kBlockBits;
// Mask for accessing entries in a block (in a ringbuffer manner).
static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
// Number of entries in a particular bucket.
uint16_t num_[kBucketSize];
// Buckets containing kBlockSize of backward references.
uint32_t buckets_[kBucketSize][kBlockSize];
// True if num_ array needs to be initialized.
bool need_init_;
size_t num_dict_lookups_;
size_t num_dict_matches_;
};
// A (forgetful) hash table where each hash bucket contains a binary tree of
// sequences whose first 4 bytes share the same hash code.
// Each sequence is kMaxTreeCompLength long and is identified by its starting
// position in the input data. The binary tree is sorted by the lexicographic
// order of the sequences, and it is also a max-heap with respect to the
// starting positions.
class HashToBinaryTree {
public:
HashToBinaryTree() : forest_(NULL) {
Reset();
}
~HashToBinaryTree() {
delete[] forest_;
}
void Reset() {
need_init_ = true;
}
void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
if (need_init_) {
window_mask_ = (1u << lgwin) - 1u;
invalid_pos_ = static_cast<uint32_t>(-window_mask_);
for (uint32_t i = 0; i < kBucketSize; i++) {
buckets_[i] = invalid_pos_;
}
size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
forest_ = new uint32_t[2 * num_nodes];
need_init_ = false;
}
}
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
// length of max_length and stores the position cur_ix in the hash table.
//
// Sets *num_matches to the number of matches found, and stores the found
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
// sorted by strictly increasing length and (non-strictly) increasing
// distance.
size_t FindAllMatches(const uint8_t* data,
const size_t ring_buffer_mask,
const size_t cur_ix,
const size_t max_length,
const size_t max_backward,
BackwardMatch* matches) {
BackwardMatch* const orig_matches = matches;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
size_t best_len = 1;
size_t stop = cur_ix - 64;
if (cur_ix < 64) { stop = 0; }
for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
size_t prev_ix = i;
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (data[cur_ix_masked] != data[prev_ix] ||
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
best_len = len;
*matches++ = BackwardMatch(backward, len);
}
}
if (best_len < max_length) {
matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
max_length, &best_len, matches);
}
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
dict_matches[i] = kInvalidMatch;
}
size_t minlen = std::max<size_t>(4, best_len + 1);
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
&dict_matches[0])) {
size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
for (size_t l = minlen; l <= maxlen; ++l) {
uint32_t dict_id = dict_matches[l];
if (dict_id < kInvalidMatch) {
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
dict_id & 31);
}
}
}
return static_cast<size_t>(matches - orig_matches);
}
// Stores the hash of the next 4 bytes and re-roots the binary tree at the
// current sequence, without returning any matches.
// REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
void Store(const uint8_t* data,
const size_t ring_buffer_mask,
const size_t cur_ix) {
size_t best_len = 0;
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
&best_len, NULL);
}
void StitchToPreviousBlock(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
// These could not be calculated before, since they require knowledge
// of both the previous and the current block.
const size_t i_start = position - kMaxTreeCompLength + 1;
const size_t i_end = std::min(position, i_start + num_bytes);
for (size_t i = i_start; i < i_end; ++i) {
// We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
// end of the current block and that we have at least
// kMaxTreeCompLength tail in the ringbuffer.
Store(ringbuffer, ringbuffer_mask, i);
}
}
}
static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
private:
// Stores the hash of the next 4 bytes and in a single tree-traversal, the
// hash bucket's binary tree is searched for matches and is re-rooted at the
// current position.
//
// If less than kMaxTreeCompLength data is available, the hash bucket of the
// current position is searched for matches, but the state of the hash table
// is not changed, since we can not know the final sorting order of the
// current (incomplete) sequence.
//
// This function must be called with increasing cur_ix positions.
BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
const size_t cur_ix,
const size_t ring_buffer_mask,
const size_t max_length,
size_t* const __restrict best_len,
BackwardMatch* __restrict matches) {
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
const size_t max_backward = window_mask_ - 15;
const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
const bool reroot_tree = max_length >= kMaxTreeCompLength;
const uint32_t key = HashBytes(&data[cur_ix_masked]);
size_t prev_ix = buckets_[key];
// The forest index of the rightmost node of the left subtree of the new
// root, updated as we traverse and reroot the tree of the hash bucket.
size_t node_left = LeftChildIndex(cur_ix);
// The forest index of the leftmost node of the right subtree of the new
// root, updated as we traverse and reroot the tree of the hash bucket.
size_t node_right = RightChildIndex(cur_ix);
// The match length of the rightmost node of the left subtree of the new
// root, updated as we traverse and reroot the tree of the hash bucket.
size_t best_len_left = 0;
// The match length of the leftmost node of the right subtree of the new
// root, updated as we traverse and reroot the tree of the hash bucket.
size_t best_len_right = 0;
if (reroot_tree) {
buckets_[key] = static_cast<uint32_t>(cur_ix);
}
for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
const size_t backward = cur_ix - prev_ix;
const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
if (backward == 0 || backward > max_backward || depth_remaining == 0) {
if (reroot_tree) {
forest_[node_left] = invalid_pos_;
forest_[node_right] = invalid_pos_;
}
break;
}
const size_t cur_len = std::min(best_len_left, best_len_right);
const size_t len = cur_len +
FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
&data[prev_ix_masked + cur_len],
max_length - cur_len);
if (len > *best_len) {
*best_len = len;
if (matches) {
*matches++ = BackwardMatch(backward, len);
}
if (len >= max_comp_len) {
if (reroot_tree) {
forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
forest_[node_right] = forest_[RightChildIndex(prev_ix)];
}
break;
}
}
if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
best_len_left = len;
if (reroot_tree) {
forest_[node_left] = static_cast<uint32_t>(prev_ix);
}
node_left = RightChildIndex(prev_ix);
prev_ix = forest_[node_left];
} else {
best_len_right = len;
if (reroot_tree) {
forest_[node_right] = static_cast<uint32_t>(prev_ix);
}
node_right = LeftChildIndex(prev_ix);
prev_ix = forest_[node_right];
}
}
return matches;
}
inline size_t LeftChildIndex(const size_t pos) {
return 2 * (pos & window_mask_);
}
inline size_t RightChildIndex(const size_t pos) {
return 2 * (pos & window_mask_) + 1;
}
static uint32_t HashBytes(const uint8_t *data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return h >> (32 - kBucketBits);
}
static const int kBucketBits = 17;
static const size_t kBucketSize = 1 << kBucketBits;
// The window size minus 1
size_t window_mask_;
// Hash table that maps the 4-byte hashes of the sequence to the last
// position where this hash was found, which is the root of the binary
// tree of sequences that share this hash bucket.
uint32_t buckets_[kBucketSize];
// The union of the binary trees of each hash bucket. The root of the tree
// corresponding to a hash is a sequence starting at buckets_[hash] and
// the left and right children of a sequence starting at pos are
// forest_[2 * pos] and forest_[2 * pos + 1].
uint32_t* forest_;
// A position used to mark a non-existent sequence, i.e. a tree is empty if
// its root is at invalid_pos_ and a node is a leaf if both its children
// are at invalid_pos_.
uint32_t invalid_pos_;
bool need_init_;
};
struct Hashers {
// For kBucketSweep == 1, enabling the dictionary lookup makes compression
// a little faster (0.5% - 1%) and it compresses 0.15% better on small text
// and html inputs.
typedef HashLongestMatchQuickly<16, 1, true> H2;
typedef HashLongestMatchQuickly<16, 2, false> H3;
typedef HashLongestMatchQuickly<17, 4, true> H4;
typedef HashLongestMatch<14, 4, 4> H5;
typedef HashLongestMatch<14, 5, 4> H6;
typedef HashLongestMatch<15, 6, 10> H7;
typedef HashLongestMatch<15, 7, 10> H8;
typedef HashLongestMatch<15, 8, 16> H9;
typedef HashToBinaryTree H10;
Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
~Hashers(void) {
delete hash_h2;
delete hash_h3;
delete hash_h4;
delete hash_h5;
delete hash_h6;
delete hash_h7;
delete hash_h8;
delete hash_h9;
delete hash_h10;
}
void Init(int type) {
switch (type) {
case 2: hash_h2 = new H2; break;
case 3: hash_h3 = new H3; break;
case 4: hash_h4 = new H4; break;
case 5: hash_h5 = new H5; break;
case 6: hash_h6 = new H6; break;
case 7: hash_h7 = new H7; break;
case 8: hash_h8 = new H8; break;
case 9: hash_h9 = new H9; break;
case 10: hash_h10 = new H10; break;
default: break;
}
}
template<typename Hasher>
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
hasher->Init();
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
hasher->Store(&dict[i], static_cast<uint32_t>(i));
}
}
// Custom LZ77 window.
void PrependCustomDictionary(
int type, int lgwin, const size_t size, const uint8_t* dict) {
switch (type) {
case 2: WarmupHash(size, dict, hash_h2); break;
case 3: WarmupHash(size, dict, hash_h3); break;
case 4: WarmupHash(size, dict, hash_h4); break;
case 5: WarmupHash(size, dict, hash_h5); break;
case 6: WarmupHash(size, dict, hash_h6); break;
case 7: WarmupHash(size, dict, hash_h7); break;
case 8: WarmupHash(size, dict, hash_h8); break;
case 9: WarmupHash(size, dict, hash_h9); break;
case 10:
hash_h10->Init(lgwin, 0, size, false);
for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
}
break;
default: break;
}
}
H2* hash_h2;
H3* hash_h3;
H4* hash_h4;
H5* hash_h5;
H6* hash_h6;
H7* hash_h7;
H8* hash_h8;
H9* hash_h9;
H10* hash_h10;
};
} // namespace brotli
#endif // BROTLI_ENC_HASH_H_

Просмотреть файл

@ -0,0 +1,67 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Build per-context histograms of literals, commands and distance codes.
#include "./histogram.h"
#include <cmath>
#include "./block_splitter.h"
#include "./command.h"
#include "./context.h"
#include "./prefix.h"
namespace brotli {
void BuildHistograms(
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
const uint8_t* ringbuffer,
size_t start_pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const std::vector<ContextType>& context_modes,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms) {
size_t pos = start_pos;
BlockSplitIterator literal_it(literal_split);
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
BlockSplitIterator dist_it(dist_split);
for (size_t i = 0; i < num_commands; ++i) {
const Command &cmd = cmds[i];
insert_and_copy_it.Next();
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
cmd.cmd_prefix_);
for (size_t j = cmd.insert_len_; j != 0; --j) {
literal_it.Next();
size_t context = (literal_it.type_ << kLiteralContextBits) +
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
prev_byte2 = prev_byte;
prev_byte = ringbuffer[pos & mask];
++pos;
}
pos += cmd.copy_len();
if (cmd.copy_len()) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
dist_it.Next();
size_t context = (dist_it.type_ << kDistanceContextBits) +
cmd.DistanceContext();
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
}
}
}
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,94 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Models the histograms of literals, commands and distance codes.
#ifndef BROTLI_ENC_HISTOGRAM_H_
#define BROTLI_ENC_HISTOGRAM_H_
#include <cstring>
#include <limits>
#include <vector>
#include "./context.h"
#include "./command.h"
#include "./fast_log.h"
#include "./prefix.h"
#include "./types.h"
namespace brotli {
struct BlockSplit;
// A simple container for histograms of data in blocks.
template<int kDataSize>
struct Histogram {
Histogram(void) {
Clear();
}
void Clear(void) {
memset(data_, 0, sizeof(data_));
total_count_ = 0;
bit_cost_ = std::numeric_limits<double>::infinity();
}
void Add(size_t val) {
++data_[val];
++total_count_;
}
void Remove(size_t val) {
--data_[val];
--total_count_;
}
template<typename DataType>
void Add(const DataType *p, size_t n) {
total_count_ += n;
n += 1;
while(--n) ++data_[*p++];
}
void AddHistogram(const Histogram& v) {
total_count_ += v.total_count_;
for (size_t i = 0; i < kDataSize; ++i) {
data_[i] += v.data_[i];
}
}
uint32_t data_[kDataSize];
size_t total_count_;
double bit_cost_;
};
// Literal histogram.
typedef Histogram<256> HistogramLiteral;
// Prefix histograms.
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
typedef Histogram<272> HistogramContextMap;
// Block type histogram, 256 block types + 2 special symbols.
typedef Histogram<258> HistogramBlockType;
static const size_t kLiteralContextBits = 6;
static const size_t kDistanceContextBits = 2;
void BuildHistograms(
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
const uint8_t* ringbuffer,
size_t pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const std::vector<ContextType>& context_modes,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms);
} // namespace brotli
#endif // BROTLI_ENC_HISTOGRAM_H_

Просмотреть файл

@ -0,0 +1,165 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Literal cost model to allow backward reference replacement to be efficient.
#include "./literal_cost.h"
#include <math.h>
#include <algorithm>
#include "./fast_log.h"
#include "./types.h"
#include "./utf8_util.h"
namespace brotli {
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
if (c < 128) {
return 0; // Next one is the 'Byte 1' again.
} else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
return std::min<size_t>(1, clamp);
} else {
// Let's decide over the last byte if this ends the sequence.
if (last < 0xe0) {
return 0; // Completed two or three byte coding.
} else { // Next one is the 'Byte 3' of utf-8 encoding.
return std::min<size_t>(2, clamp);
}
}
}
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
const uint8_t *data) {
size_t counts[3] = { 0 };
size_t max_utf8 = 1; // should be 2, but 1 compresses better.
size_t last_c = 0;
size_t utf8_pos = 0;
for (size_t i = 0; i < len; ++i) {
size_t c = data[(pos + i) & mask];
utf8_pos = UTF8Position(last_c, c, 2);
++counts[utf8_pos];
last_c = c;
}
if (counts[2] < 500) {
max_utf8 = 1;
}
if (counts[1] + counts[2] < 25) {
max_utf8 = 0;
}
return max_utf8;
}
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
const uint8_t *data, float *cost) {
// max_utf8 is 0 (normal ascii single byte modeling),
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
size_t histogram[3][256] = { { 0 } };
size_t window_half = 495;
size_t in_window = std::min(window_half, len);
size_t in_window_utf8[3] = { 0 };
// Bootstrap histograms.
size_t last_c = 0;
size_t utf8_pos = 0;
for (size_t i = 0; i < in_window; ++i) {
size_t c = data[(pos + i) & mask];
++histogram[utf8_pos][c];
++in_window_utf8[utf8_pos];
utf8_pos = UTF8Position(last_c, c, max_utf8);
last_c = c;
}
// Compute bit costs with sliding window.
for (size_t i = 0; i < len; ++i) {
if (i >= window_half) {
// Remove a byte in the past.
size_t c = i < window_half + 1 ?
0 : data[(pos + i - window_half - 1) & mask];
size_t last_c = i < window_half + 2 ?
0 : data[(pos + i - window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
--in_window_utf8[utf8_pos2];
}
if (i + window_half < len) {
// Add a byte in the future.
size_t c = data[(pos + i + window_half - 1) & mask];
size_t last_c = data[(pos + i + window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
++in_window_utf8[utf8_pos2];
}
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t masked_pos = (pos + i) & mask;
size_t histo = histogram[utf8_pos][data[masked_pos]];
if (histo == 0) {
histo = 1;
}
double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
lit_cost += 0.02905;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
// Make the first bytes more expensive -- seems to help, not sure why.
// Perhaps because the entropy source is changing its properties
// rapidly in the beginning of the file, perhaps because the beginning
// of the data is a statistical "anomaly".
if (i < 2000) {
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
}
cost[i] = static_cast<float>(lit_cost);
}
}
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
const uint8_t *data, float *cost) {
if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
return;
}
size_t histogram[256] = { 0 };
size_t window_half = 2000;
size_t in_window = std::min(window_half, len);
// Bootstrap histogram.
for (size_t i = 0; i < in_window; ++i) {
++histogram[data[(pos + i) & mask]];
}
// Compute bit costs with sliding window.
for (size_t i = 0; i < len; ++i) {
if (i >= window_half) {
// Remove a byte in the past.
--histogram[data[(pos + i - window_half) & mask]];
--in_window;
}
if (i + window_half < len) {
// Add a byte in the future.
++histogram[data[(pos + i + window_half) & mask]];
++in_window;
}
size_t histo = histogram[data[(pos + i) & mask]];
if (histo == 0) {
histo = 1;
}
double lit_cost = FastLog2(in_window) - FastLog2(histo);
lit_cost += 0.029;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
cost[i] = static_cast<float>(lit_cost);
}
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,24 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Literal cost model to allow backward reference replacement to be efficient.
#ifndef BROTLI_ENC_LITERAL_COST_H_
#define BROTLI_ENC_LITERAL_COST_H_
#include "./types.h"
namespace brotli {
// Estimates how many bits the literals in the interval [pos, pos + len) in the
// ringbuffer (data, mask) will take entropy coded and writes these estimates
// to the cost[0..len) array.
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
const uint8_t *data, float *cost);
} // namespace brotli
#endif // BROTLI_ENC_LITERAL_COST_H_

Просмотреть файл

@ -0,0 +1,539 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Algorithms for distributing the literals and commands of a metablock between
// block types and contexts.
#include "./metablock.h"
#include "./block_splitter.h"
#include "./context.h"
#include "./cluster.h"
#include "./histogram.h"
namespace brotli {
void BuildMetaBlock(const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const Command* cmds,
size_t num_commands,
ContextType literal_context_mode,
MetaBlockSplit* mb) {
SplitBlock(cmds, num_commands,
ringbuffer, pos, mask,
&mb->literal_split,
&mb->command_split,
&mb->distance_split);
std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
literal_context_mode);
size_t num_literal_contexts =
mb->literal_split.num_types << kLiteralContextBits;
size_t num_distance_contexts =
mb->distance_split.num_types << kDistanceContextBits;
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
mb->command_histograms.resize(mb->command_split.num_types);
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
BuildHistograms(cmds, num_commands,
mb->literal_split,
mb->command_split,
mb->distance_split,
ringbuffer,
pos,
mask,
prev_byte,
prev_byte2,
literal_context_modes,
&literal_histograms,
&mb->command_histograms,
&distance_histograms);
// Histogram ids need to fit in one byte.
static const size_t kMaxNumberOfHistograms = 256;
ClusterHistograms(literal_histograms,
1u << kLiteralContextBits,
mb->literal_split.num_types,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
ClusterHistograms(distance_histograms,
1u << kDistanceContextBits,
mb->distance_split.num_types,
kMaxNumberOfHistograms,
&mb->distance_histograms,
&mb->distance_context_map);
}
// Greedy block splitter for one block category (literal, command or distance).
template<typename HistogramType>
class BlockSplitter {
public:
BlockSplitter(size_t alphabet_size,
size_t min_block_size,
double split_threshold,
size_t num_symbols,
BlockSplit* split,
std::vector<HistogramType>* histograms)
: alphabet_size_(alphabet_size),
min_block_size_(min_block_size),
split_threshold_(split_threshold),
num_blocks_(0),
split_(split),
histograms_(histograms),
target_block_size_(min_block_size),
block_size_(0),
curr_histogram_ix_(0),
merge_last_count_(0) {
size_t max_num_blocks = num_symbols / min_block_size + 1;
// We have to allocate one more histogram than the maximum number of block
// types for the current histogram when the meta-block is too big.
size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
split_->lengths.resize(max_num_blocks);
split_->types.resize(max_num_blocks);
histograms_->resize(max_num_types);
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
}
// Adds the next symbol to the current histogram. When the current histogram
// reaches the target size, decides on merging the block.
void AddSymbol(size_t symbol) {
(*histograms_)[curr_histogram_ix_].Add(symbol);
++block_size_;
if (block_size_ == target_block_size_) {
FinishBlock(/* is_final = */ false);
}
}
// Does either of three things:
// (1) emits the current block with a new block type;
// (2) emits the current block with the type of the second last block;
// (3) merges the current block with the last block.
void FinishBlock(bool is_final) {
if (block_size_ < min_block_size_) {
block_size_ = min_block_size_;
}
if (num_blocks_ == 0) {
// Create first block.
split_->lengths[0] = static_cast<uint32_t>(block_size_);
split_->types[0] = 0;
last_entropy_[0] =
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
last_entropy_[1] = last_entropy_[0];
++num_blocks_;
++split_->num_types;
++curr_histogram_ix_;
block_size_ = 0;
} else if (block_size_ > 0) {
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
alphabet_size_);
HistogramType combined_histo[2];
double combined_entropy[2];
double diff[2];
for (size_t j = 0; j < 2; ++j) {
size_t last_histogram_ix = last_histogram_ix_[j];
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
combined_entropy[j] = BitsEntropy(
&combined_histo[j].data_[0], alphabet_size_);
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
}
if (split_->num_types < kMaxBlockTypes &&
diff[0] > split_threshold_ &&
diff[1] > split_threshold_) {
// Create new block.
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
last_histogram_ix_[1] = last_histogram_ix_[0];
last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
last_entropy_[1] = last_entropy_[0];
last_entropy_[0] = entropy;
++num_blocks_;
++split_->num_types;
++curr_histogram_ix_;
block_size_ = 0;
merge_last_count_ = 0;
target_block_size_ = min_block_size_;
} else if (diff[1] < diff[0] - 20.0) {
// Combine this block with second last block.
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
last_entropy_[1] = last_entropy_[0];
last_entropy_[0] = combined_entropy[1];
++num_blocks_;
block_size_ = 0;
(*histograms_)[curr_histogram_ix_].Clear();
merge_last_count_ = 0;
target_block_size_ = min_block_size_;
} else {
// Combine this block with last block.
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
last_entropy_[0] = combined_entropy[0];
if (split_->num_types == 1) {
last_entropy_[1] = last_entropy_[0];
}
block_size_ = 0;
(*histograms_)[curr_histogram_ix_].Clear();
if (++merge_last_count_ > 1) {
target_block_size_ += min_block_size_;
}
}
}
if (is_final) {
(*histograms_).resize(split_->num_types);
split_->types.resize(num_blocks_);
split_->lengths.resize(num_blocks_);
}
}
private:
static const uint16_t kMaxBlockTypes = 256;
// Alphabet size of particular block category.
const size_t alphabet_size_;
// We collect at least this many symbols for each block.
const size_t min_block_size_;
// We merge histograms A and B if
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
// where A is the current histogram and B is the histogram of the last or the
// second last block type.
const double split_threshold_;
size_t num_blocks_;
BlockSplit* split_; // not owned
std::vector<HistogramType>* histograms_; // not owned
// The number of symbols that we want to collect before deciding on whether
// or not to merge the block with a previous one or emit a new block.
size_t target_block_size_;
// The number of symbols in the current histogram.
size_t block_size_;
// Offset of the current histogram.
size_t curr_histogram_ix_;
// Offset of the histograms of the previous two block types.
size_t last_histogram_ix_[2];
// Entropy of the previous two block types.
double last_entropy_[2];
// The number of times we merged the current block with the last one.
size_t merge_last_count_;
};
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb) {
size_t num_literals = 0;
for (size_t i = 0; i < n_commands; ++i) {
num_literals += commands[i].insert_len_;
}
BlockSplitter<HistogramLiteral> lit_blocks(
256, 512, 400.0, num_literals,
&mb->literal_split, &mb->literal_histograms);
BlockSplitter<HistogramCommand> cmd_blocks(
kNumCommandPrefixes, 1024, 500.0, n_commands,
&mb->command_split, &mb->command_histograms);
BlockSplitter<HistogramDistance> dist_blocks(
64, 512, 100.0, n_commands,
&mb->distance_split, &mb->distance_histograms);
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
for (size_t j = cmd.insert_len_; j != 0; --j) {
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
++pos;
}
pos += cmd.copy_len();
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
dist_blocks.AddSymbol(cmd.dist_prefix_);
}
}
lit_blocks.FinishBlock(/* is_final = */ true);
cmd_blocks.FinishBlock(/* is_final = */ true);
dist_blocks.FinishBlock(/* is_final = */ true);
}
// Greedy block splitter for one block category (literal, command or distance).
// Gathers histograms for all context buckets.
template<typename HistogramType>
class ContextBlockSplitter {
public:
ContextBlockSplitter(size_t alphabet_size,
size_t num_contexts,
size_t min_block_size,
double split_threshold,
size_t num_symbols,
BlockSplit* split,
std::vector<HistogramType>* histograms)
: alphabet_size_(alphabet_size),
num_contexts_(num_contexts),
max_block_types_(kMaxBlockTypes / num_contexts),
min_block_size_(min_block_size),
split_threshold_(split_threshold),
num_blocks_(0),
split_(split),
histograms_(histograms),
target_block_size_(min_block_size),
block_size_(0),
curr_histogram_ix_(0),
last_entropy_(2 * num_contexts),
merge_last_count_(0) {
size_t max_num_blocks = num_symbols / min_block_size + 1;
// We have to allocate one more histogram than the maximum number of block
// types for the current histogram when the meta-block is too big.
size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
split_->lengths.resize(max_num_blocks);
split_->types.resize(max_num_blocks);
histograms_->resize(max_num_types * num_contexts);
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
}
// Adds the next symbol to the current block type and context. When the
// current block reaches the target size, decides on merging the block.
void AddSymbol(size_t symbol, size_t context) {
(*histograms_)[curr_histogram_ix_ + context].Add(symbol);
++block_size_;
if (block_size_ == target_block_size_) {
FinishBlock(/* is_final = */ false);
}
}
// Does either of three things:
// (1) emits the current block with a new block type;
// (2) emits the current block with the type of the second last block;
// (3) merges the current block with the last block.
void FinishBlock(bool is_final) {
if (block_size_ < min_block_size_) {
block_size_ = min_block_size_;
}
if (num_blocks_ == 0) {
// Create first block.
split_->lengths[0] = static_cast<uint32_t>(block_size_);
split_->types[0] = 0;
for (size_t i = 0; i < num_contexts_; ++i) {
last_entropy_[i] =
BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
last_entropy_[num_contexts_ + i] = last_entropy_[i];
}
++num_blocks_;
++split_->num_types;
curr_histogram_ix_ += num_contexts_;
block_size_ = 0;
} else if (block_size_ > 0) {
// Try merging the set of histograms for the current block type with the
// respective set of histograms for the last and second last block types.
// Decide over the split based on the total reduction of entropy across
// all contexts.
std::vector<double> entropy(num_contexts_);
std::vector<HistogramType> combined_histo(2 * num_contexts_);
std::vector<double> combined_entropy(2 * num_contexts_);
double diff[2] = { 0.0 };
for (size_t i = 0; i < num_contexts_; ++i) {
size_t curr_histo_ix = curr_histogram_ix_ + i;
entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
alphabet_size_);
for (size_t j = 0; j < 2; ++j) {
size_t jx = j * num_contexts_ + i;
size_t last_histogram_ix = last_histogram_ix_[j] + i;
combined_histo[jx] = (*histograms_)[curr_histo_ix];
combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
combined_entropy[jx] = BitsEntropy(
&combined_histo[jx].data_[0], alphabet_size_);
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
}
}
if (split_->num_types < max_block_types_ &&
diff[0] > split_threshold_ &&
diff[1] > split_threshold_) {
// Create new block.
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
last_histogram_ix_[1] = last_histogram_ix_[0];
last_histogram_ix_[0] = split_->num_types * num_contexts_;
for (size_t i = 0; i < num_contexts_; ++i) {
last_entropy_[num_contexts_ + i] = last_entropy_[i];
last_entropy_[i] = entropy[i];
}
++num_blocks_;
++split_->num_types;
curr_histogram_ix_ += num_contexts_;
block_size_ = 0;
merge_last_count_ = 0;
target_block_size_ = min_block_size_;
} else if (diff[1] < diff[0] - 20.0) {
// Combine this block with second last block.
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
for (size_t i = 0; i < num_contexts_; ++i) {
(*histograms_)[last_histogram_ix_[0] + i] =
combined_histo[num_contexts_ + i];
last_entropy_[num_contexts_ + i] = last_entropy_[i];
last_entropy_[i] = combined_entropy[num_contexts_ + i];
(*histograms_)[curr_histogram_ix_ + i].Clear();
}
++num_blocks_;
block_size_ = 0;
merge_last_count_ = 0;
target_block_size_ = min_block_size_;
} else {
// Combine this block with last block.
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
for (size_t i = 0; i < num_contexts_; ++i) {
(*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
last_entropy_[i] = combined_entropy[i];
if (split_->num_types == 1) {
last_entropy_[num_contexts_ + i] = last_entropy_[i];
}
(*histograms_)[curr_histogram_ix_ + i].Clear();
}
block_size_ = 0;
if (++merge_last_count_ > 1) {
target_block_size_ += min_block_size_;
}
}
}
if (is_final) {
(*histograms_).resize(split_->num_types * num_contexts_);
split_->types.resize(num_blocks_);
split_->lengths.resize(num_blocks_);
}
}
private:
static const int kMaxBlockTypes = 256;
// Alphabet size of particular block category.
const size_t alphabet_size_;
const size_t num_contexts_;
const size_t max_block_types_;
// We collect at least this many symbols for each block.
const size_t min_block_size_;
// We merge histograms A and B if
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
// where A is the current histogram and B is the histogram of the last or the
// second last block type.
const double split_threshold_;
size_t num_blocks_;
BlockSplit* split_; // not owned
std::vector<HistogramType>* histograms_; // not owned
// The number of symbols that we want to collect before deciding on whether
// or not to merge the block with a previous one or emit a new block.
size_t target_block_size_;
// The number of symbols in the current histogram.
size_t block_size_;
// Offset of the current histogram.
size_t curr_histogram_ix_;
// Offset of the histograms of the previous two block types.
size_t last_histogram_ix_[2];
// Entropy of the previous two block types.
std::vector<double> last_entropy_;
// The number of times we merged the current block with the last one.
size_t merge_last_count_;
};
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
ContextType literal_context_mode,
size_t num_contexts,
const uint32_t* static_context_map,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb) {
size_t num_literals = 0;
for (size_t i = 0; i < n_commands; ++i) {
num_literals += commands[i].insert_len_;
}
ContextBlockSplitter<HistogramLiteral> lit_blocks(
256, num_contexts, 512, 400.0, num_literals,
&mb->literal_split, &mb->literal_histograms);
BlockSplitter<HistogramCommand> cmd_blocks(
kNumCommandPrefixes, 1024, 500.0, n_commands,
&mb->command_split, &mb->command_histograms);
BlockSplitter<HistogramDistance> dist_blocks(
64, 512, 100.0, n_commands,
&mb->distance_split, &mb->distance_histograms);
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
for (size_t j = cmd.insert_len_; j != 0; --j) {
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
uint8_t literal = ringbuffer[pos & mask];
lit_blocks.AddSymbol(literal, static_context_map[context]);
prev_byte2 = prev_byte;
prev_byte = literal;
++pos;
}
pos += cmd.copy_len();
if (cmd.copy_len()) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
dist_blocks.AddSymbol(cmd.dist_prefix_);
}
}
}
lit_blocks.FinishBlock(/* is_final = */ true);
cmd_blocks.FinishBlock(/* is_final = */ true);
dist_blocks.FinishBlock(/* is_final = */ true);
mb->literal_context_map.resize(
mb->literal_split.num_types << kLiteralContextBits);
for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
mb->literal_context_map[(i << kLiteralContextBits) + j] =
static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
}
}
}
void OptimizeHistograms(size_t num_direct_distance_codes,
size_t distance_postfix_bits,
MetaBlockSplit* mb) {
uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
good_for_rle);
}
for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
&mb->command_histograms[i].data_[0],
good_for_rle);
}
size_t num_distance_codes =
kNumDistanceShortCodes + num_direct_distance_codes +
(48u << distance_postfix_bits);
for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(num_distance_codes,
&mb->distance_histograms[i].data_[0],
good_for_rle);
}
delete[] good_for_rle;
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,80 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Algorithms for distributing the literals and commands of a metablock between
// block types and contexts.
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
#include <vector>
#include "./command.h"
#include "./histogram.h"
namespace brotli {
struct BlockSplit {
BlockSplit(void) : num_types(0) {}
size_t num_types;
std::vector<uint8_t> types;
std::vector<uint32_t> lengths;
};
struct MetaBlockSplit {
BlockSplit literal_split;
BlockSplit command_split;
BlockSplit distance_split;
std::vector<uint32_t> literal_context_map;
std::vector<uint32_t> distance_context_map;
std::vector<HistogramLiteral> literal_histograms;
std::vector<HistogramCommand> command_histograms;
std::vector<HistogramDistance> distance_histograms;
};
// Uses the slow shortest-path block splitter and does context clustering.
void BuildMetaBlock(const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const Command* cmds,
size_t num_commands,
ContextType literal_context_mode,
MetaBlockSplit* mb);
// Uses a fast greedy block splitter that tries to merge current block with the
// last or the second last block and does not do any context modeling.
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb);
// Uses a fast greedy block splitter that tries to merge current block with the
// last or the second last block and uses a static context clustering which
// is the same for all block types.
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
ContextType literal_context_mode,
size_t num_contexts,
const uint32_t* static_context_map,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb);
void OptimizeHistograms(size_t num_direct_distance_codes,
size_t distance_postfix_bits,
MetaBlockSplit* mb);
} // namespace brotli
#endif // BROTLI_ENC_METABLOCK_H_

142
modules/brotli/enc/port.h Normal file
Просмотреть файл

@ -0,0 +1,142 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Macros for endianness, branch prediction and unaligned loads and stores.
#ifndef BROTLI_ENC_PORT_H_
#define BROTLI_ENC_PORT_H_
#include <assert.h>
#include <string.h>
#include "./types.h"
#if defined OS_LINUX || defined OS_CYGWIN
#include <endian.h>
#elif defined OS_FREEBSD
#include <machine/endian.h>
#elif defined OS_MACOSX
#include <machine/endian.h>
/* Let's try and follow the Linux convention */
#define __BYTE_ORDER BYTE_ORDER
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#endif
// define the macro IS_LITTLE_ENDIAN
// using the above endian definitions from endian.h if
// endian.h was included
#ifdef __BYTE_ORDER
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define IS_LITTLE_ENDIAN
#endif
#else
#if defined(__LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#endif
#endif // __BYTE_ORDER
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#endif
// Enable little-endian optimization for x64 architecture on Windows.
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
#define IS_LITTLE_ENDIAN
#endif
/* Compatibility with non-clang compilers. */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
(defined(__llvm__) && __has_builtin(__builtin_expect))
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else
#define PREDICT_FALSE(x) (x)
#define PREDICT_TRUE(x) (x)
#endif
// Portable handling of unaligned loads, stores, and copies.
// On some platforms, like ARM, the copy functions can be more efficient
// then a load and a store.
#if defined(ARCH_PIII) || \
defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
// x86 and x86-64 can perform unaligned loads/stores directly;
// modern PowerPC hardware can also do unaligned integer loads and stores;
// but note: the FPU still sends unaligned loads and stores to a trap handler!
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
(*reinterpret_cast<uint32_t *>(_p) = (_val))
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
(*reinterpret_cast<uint64_t *>(_p) = (_val))
#elif defined(__arm__) && \
!defined(__ARM_ARCH_5__) && \
!defined(__ARM_ARCH_5T__) && \
!defined(__ARM_ARCH_5TE__) && \
!defined(__ARM_ARCH_5TEJ__) && \
!defined(__ARM_ARCH_6__) && \
!defined(__ARM_ARCH_6J__) && \
!defined(__ARM_ARCH_6K__) && \
!defined(__ARM_ARCH_6Z__) && \
!defined(__ARM_ARCH_6ZK__) && \
!defined(__ARM_ARCH_6T2__)
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
// do an unaligned read and rotate the words around a bit, or do the reads very
// slowly (trip through kernel mode).
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
(*reinterpret_cast<uint32_t *>(_p) = (_val))
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#else
// These functions are provided for architectures that don't support
// unaligned loads and stores.
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
uint32_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
memcpy(p, &v, sizeof v);
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#endif
#endif // BROTLI_ENC_PORT_H_

Просмотреть файл

@ -0,0 +1,79 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions for encoding of integers into prefix codes the amount of extra
// bits, and the actual values of the extra bits.
#ifndef BROTLI_ENC_PREFIX_H_
#define BROTLI_ENC_PREFIX_H_
#include "./fast_log.h"
#include "./types.h"
namespace brotli {
static const uint32_t kNumInsertLenPrefixes = 24;
static const uint32_t kNumCopyLenPrefixes = 24;
static const uint32_t kNumCommandPrefixes = 704;
static const uint32_t kNumBlockLenPrefixes = 26;
static const uint32_t kNumDistanceShortCodes = 16;
static const uint32_t kNumDistancePrefixes = 520;
// Represents the range of values belonging to a prefix code:
// [offset, offset + 2^nbits)
struct PrefixCodeRange {
uint32_t offset;
uint32_t nbits;
};
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
{8433, 13}, {16625, 24}
};
inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
uint32_t* n_extra, uint32_t* extra) {
*code = 0;
while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
++(*code);
}
*n_extra = kBlockLengthPrefixCode[*code].nbits;
*extra = len - kBlockLengthPrefixCode[*code].offset;
}
inline void PrefixEncodeCopyDistance(size_t distance_code,
size_t num_direct_codes,
size_t postfix_bits,
uint16_t* code,
uint32_t* extra_bits) {
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
*code = static_cast<uint16_t>(distance_code);
*extra_bits = 0;
return;
}
distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */
distance_code += (1u << (postfix_bits + 2u)); /* > 0 */
size_t bucket = Log2FloorNonZero(distance_code) - 1;
size_t postfix_mask = (1 << postfix_bits) - 1;
size_t postfix = distance_code & postfix_mask;
size_t prefix = (distance_code >> bucket) & 1;
size_t offset = (2 + prefix) << bucket;
size_t nbits = bucket - postfix_bits;
*code = static_cast<uint16_t>(
(kNumDistanceShortCodes + num_direct_codes +
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
*extra_bits = static_cast<uint32_t>(
(nbits << 24) | ((distance_code - offset) >> postfix_bits));
}
} // namespace brotli
#endif // BROTLI_ENC_PREFIX_H_

Просмотреть файл

@ -0,0 +1,145 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Sliding window over the input data.
#ifndef BROTLI_ENC_RINGBUFFER_H_
#define BROTLI_ENC_RINGBUFFER_H_
#include <cstdlib> /* free, realloc */
#include "./port.h"
#include "./types.h"
namespace brotli {
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
// data in a circular manner: writing a byte writes it to:
// `position() % (1 << window_bits)'.
// For convenience, the RingBuffer array contains another copy of the
// first `1 << tail_bits' bytes:
// buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
// and another copy of the last two bytes:
// buffer_[-1] == buffer_[(1 << window_bits) - 1] and
// buffer_[-2] == buffer_[(1 << window_bits) - 2].
class RingBuffer {
public:
RingBuffer(int window_bits, int tail_bits)
: size_(1u << window_bits),
mask_((1u << window_bits) - 1),
tail_size_(1u << tail_bits),
total_size_(size_ + tail_size_),
cur_size_(0),
pos_(0),
data_(0),
buffer_(0) {}
~RingBuffer(void) {
free(data_);
}
// Allocates or re-allocates data_ to the given length + plus some slack
// region before and after. Fills the slack regions with zeros.
inline void InitBuffer(const uint32_t buflen) {
static const size_t kSlackForEightByteHashingEverywhere = 7;
cur_size_ = buflen;
data_ = static_cast<uint8_t*>(realloc(
data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
buffer_ = data_ + 2;
buffer_[-2] = buffer_[-1] = 0;
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
buffer_[cur_size_ + i] = 0;
}
}
// Push bytes into the ring buffer.
void Write(const uint8_t *bytes, size_t n) {
if (pos_ == 0 && n < tail_size_) {
// Special case for the first write: to process the first block, we don't
// need to allocate the whole ringbuffer and we don't need the tail
// either. However, we do this memory usage optimization only if the
// first write is less than the tail size, which is also the input block
// size, otherwise it is likely that other blocks will follow and we
// will need to reallocate to the full size anyway.
pos_ = static_cast<uint32_t>(n);
InitBuffer(pos_);
memcpy(buffer_, bytes, n);
return;
}
if (cur_size_ < total_size_) {
// Lazily allocate the full buffer.
InitBuffer(total_size_);
// Initialize the last two bytes to zero, so that we don't have to worry
// later when we copy the last two bytes to the first two positions.
buffer_[size_ - 2] = 0;
buffer_[size_ - 1] = 0;
}
const size_t masked_pos = pos_ & mask_;
// The length of the writes is limited so that we do not need to worry
// about a write
WriteTail(bytes, n);
if (PREDICT_TRUE(masked_pos + n <= size_)) {
// A single write fits.
memcpy(&buffer_[masked_pos], bytes, n);
} else {
// Split into two writes.
// Copy into the end of the buffer, including the tail buffer.
memcpy(&buffer_[masked_pos], bytes,
std::min(n, total_size_ - masked_pos));
// Copy into the beginning of the buffer
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
n - (size_ - masked_pos));
}
buffer_[-2] = buffer_[size_ - 2];
buffer_[-1] = buffer_[size_ - 1];
pos_ += static_cast<uint32_t>(n);
if (pos_ > (1u << 30)) { /* Wrap, but preserve not-a-first-lap feature. */
pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
}
}
void Reset(void) {
pos_ = 0;
}
// Logical cursor position in the ring buffer.
uint32_t position(void) const { return pos_; }
// Bit mask for getting the physical position for a logical position.
uint32_t mask(void) const { return mask_; }
uint8_t *start(void) { return &buffer_[0]; }
const uint8_t *start(void) const { return &buffer_[0]; }
private:
void WriteTail(const uint8_t *bytes, size_t n) {
const size_t masked_pos = pos_ & mask_;
if (PREDICT_FALSE(masked_pos < tail_size_)) {
// Just fill the tail buffer with the beginning data.
const size_t p = size_ + masked_pos;
memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
}
}
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
const uint32_t size_;
const uint32_t mask_;
const uint32_t tail_size_;
const uint32_t total_size_;
uint32_t cur_size_;
// Position to write in the ring buffer.
uint32_t pos_;
// The actual ring buffer containing the copy of the last two bytes, the data,
// and the copy of the beginning as a tail.
uint8_t *data_;
// The start of the ringbuffer.
uint8_t *buffer_;
};
} // namespace brotli
#endif // BROTLI_ENC_RINGBUFFER_H_

Просмотреть файл

@ -0,0 +1,455 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "./static_dict.h"
#include <algorithm>
#include "./dictionary.h"
#include "./find_match_length.h"
#include "./static_dict_lut.h"
#include "./transform.h"
namespace brotli {
inline uint32_t Hash(const uint8_t *data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return h >> (32 - kDictNumBits);
}
inline void AddMatch(size_t distance, size_t len, size_t len_code,
uint32_t* matches) {
uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
matches[len] = std::min(matches[len], match);
}
inline size_t DictMatchLength(const uint8_t* data,
size_t id,
size_t len,
size_t maxlen) {
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
std::min(len, maxlen));
}
inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
if (w.len > max_length) return false;
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
const uint8_t* dict = &kBrotliDictionary[offset];
if (w.transform == 0) {
// Match against base dictionary word.
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
} else if (w.transform == 10) {
// Match against uppercase first transform.
// Note that there are only ASCII uppercase words in the lookup table.
return (dict[0] >= 'a' && dict[0] <= 'z' &&
(dict[0] ^ 32) == data[0] &&
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
w.len - 1u);
} else {
// Match against uppercase all transform.
// Note that there are only ASCII uppercase words in the lookup table.
for (size_t i = 0; i < w.len; ++i) {
if (dict[i] >= 'a' && dict[i] <= 'z') {
if ((dict[i] ^ 32) != data[i]) return false;
} else {
if (dict[i] != data[i]) return false;
}
}
return true;
}
}
bool FindAllStaticDictionaryMatches(const uint8_t* data,
size_t min_length,
size_t max_length,
uint32_t* matches) {
bool found_match = false;
size_t key = Hash(data);
size_t bucket = kStaticDictionaryBuckets[key];
if (bucket != 0) {
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0) {
const size_t matchlen = DictMatchLength(data, id, l, max_length);
// Transform "" + kIdentity + ""
if (matchlen == l) {
AddMatch(id, l, l, matches);
found_match = true;
}
// Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
if (matchlen >= l - 1) {
AddMatch(id + 12 * n, l - 1, l, matches);
if (l + 2 < max_length &&
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
data[l + 2] == ' ') {
AddMatch(id + 49 * n, l + 3, l, matches);
}
found_match = true;
}
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
size_t minlen = min_length;
if (l > 9) minlen = std::max(minlen, l - 9);
size_t maxlen = std::min(matchlen, l - 2);
for (size_t len = minlen; len <= maxlen; ++len) {
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
found_match = true;
}
if (matchlen < l || l + 6 >= max_length) {
continue;
}
const uint8_t* s = &data[l];
// Transforms "" + kIdentity + <suffix>
if (s[0] == ' ') {
AddMatch(id + n, l + 1, l, matches);
if (s[1] == 'a') {
if (s[2] == ' ') {
AddMatch(id + 28 * n, l + 3, l, matches);
} else if (s[2] == 's') {
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
} else if (s[2] == 't') {
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
} else if (s[2] == 'n') {
if (s[3] == 'd' && s[4] == ' ') {
AddMatch(id + 10 * n, l + 5, l, matches);
}
}
} else if (s[1] == 'b') {
if (s[2] == 'y' && s[3] == ' ') {
AddMatch(id + 38 * n, l + 4, l, matches);
}
} else if (s[1] == 'i') {
if (s[2] == 'n') {
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
} else if (s[2] == 's') {
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
}
} else if (s[1] == 'f') {
if (s[2] == 'o') {
if (s[3] == 'r' && s[4] == ' ') {
AddMatch(id + 25 * n, l + 5, l, matches);
}
} else if (s[2] == 'r') {
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
AddMatch(id + 37 * n, l + 6, l, matches);
}
}
} else if (s[1] == 'o') {
if (s[2] == 'f') {
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
} else if (s[2] == 'n') {
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
}
} else if (s[1] == 'n') {
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
AddMatch(id + 80 * n, l + 5, l, matches);
}
} else if (s[1] == 't') {
if (s[2] == 'h') {
if (s[3] == 'e') {
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
} else if (s[3] == 'a') {
if (s[4] == 't' && s[5] == ' ') {
AddMatch(id + 29 * n, l + 6, l, matches);
}
}
} else if (s[2] == 'o') {
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
}
} else if (s[1] == 'w') {
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
AddMatch(id + 35 * n, l + 6, l, matches);
}
}
} else if (s[0] == '"') {
AddMatch(id + 19 * n, l + 1, l, matches);
if (s[1] == '>') {
AddMatch(id + 21 * n, l + 2, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + 20 * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + 31 * n, l + 2, l, matches);
if (s[2] == 'T' && s[3] == 'h') {
if (s[4] == 'e') {
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
} else if (s[4] == 'i') {
if (s[5] == 's' && s[6] == ' ') {
AddMatch(id + 75 * n, l + 7, l, matches);
}
}
}
}
} else if (s[0] == ',') {
AddMatch(id + 76 * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + 14 * n, l + 2, l, matches);
}
} else if (s[0] == '\n') {
AddMatch(id + 22 * n, l + 1, l, matches);
if (s[1] == '\t') {
AddMatch(id + 50 * n, l + 2, l, matches);
}
} else if (s[0] == ']') {
AddMatch(id + 24 * n, l + 1, l, matches);
} else if (s[0] == '\'') {
AddMatch(id + 36 * n, l + 1, l, matches);
} else if (s[0] == ':') {
AddMatch(id + 51 * n, l + 1, l, matches);
} else if (s[0] == '(') {
AddMatch(id + 57 * n, l + 1, l, matches);
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + 70 * n, l + 2, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + 86 * n, l + 2, l, matches);
}
} else if (s[0] == 'a') {
if (s[1] == 'l' && s[2] == ' ') {
AddMatch(id + 84 * n, l + 3, l, matches);
}
} else if (s[0] == 'e') {
if (s[1] == 'd') {
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
} else if (s[1] == 'r') {
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
} else if (s[1] == 's') {
if (s[2] == 't' && s[3] == ' ') {
AddMatch(id + 95 * n, l + 4, l, matches);
}
}
} else if (s[0] == 'f') {
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
AddMatch(id + 90 * n, l + 4, l, matches);
}
} else if (s[0] == 'i') {
if (s[1] == 'v') {
if (s[2] == 'e' && s[3] == ' ') {
AddMatch(id + 92 * n, l + 4, l, matches);
}
} else if (s[1] == 'z') {
if (s[2] == 'e' && s[3] == ' ') {
AddMatch(id + 100 * n, l + 4, l, matches);
}
}
} else if (s[0] == 'l') {
if (s[1] == 'e') {
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
AddMatch(id + 93 * n, l + 5, l, matches);
}
} else if (s[1] == 'y') {
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
}
} else if (s[0] == 'o') {
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
AddMatch(id + 106 * n, l + 4, l, matches);
}
}
} else {
// Set t=false for kUppercaseFirst and
// t=true otherwise (kUppercaseAll) transform.
const bool t = w.transform != kUppercaseFirst;
if (!IsMatch(w, data, max_length)) {
continue;
}
// Transform "" + kUppercase{First,All} + ""
AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
found_match = true;
if (l + 1 >= max_length) {
continue;
}
// Transforms "" + kUppercase{First,All} + <suffix>
const uint8_t* s = &data[l];
if (s[0] == ' ') {
AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
} else if (s[0] == '"') {
AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
if (s[1] == '>') {
AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
}
} else if (s[0] == ',') {
AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
}
} else if (s[0] == '\'') {
AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
} else if (s[0] == '(') {
AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
}
}
}
}
}
// Transforms with prefixes " " and "."
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
bool is_space = (data[0] == ' ');
key = Hash(&data[1]);
bucket = kStaticDictionaryBuckets[key];
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0) {
if (!IsMatch(w, &data[1], max_length - 1)) {
continue;
}
// Transforms " " + kIdentity + "" and "." + kIdentity + ""
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
found_match = true;
if (l + 2 >= max_length) {
continue;
}
// Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
const uint8_t* s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
} else if (s[0] == '(') {
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
} else if (is_space) {
if (s[0] == ',') {
AddMatch(id + 103 * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + 33 * n, l + 3, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + 71 * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + 52 * n, l + 3, l, matches);
}
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + 81 * n, l + 3, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + 98 * n, l + 3, l, matches);
}
}
}
} else if (is_space) {
// Set t=false for kUppercaseFirst and
// t=true otherwise (kUppercaseAll) transform.
const bool t = w.transform != kUppercaseFirst;
if (!IsMatch(w, &data[1], max_length - 1)) {
continue;
}
// Transforms " " + kUppercase{First,All} + ""
AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
found_match = true;
if (l + 2 >= max_length) {
continue;
}
// Transforms " " + kUppercase{First,All} + <suffix>
const uint8_t* s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
} else if (s[0] == ',') {
if (!t) {
AddMatch(id + 109 * n, l + 2, l, matches);
}
if (s[1] == ' ') {
AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
}
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
}
}
}
}
}
if (max_length >= 6) {
// Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
if ((data[1] == ' ' &&
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
(data[0] == 0xc2 && data[1] == 0xa0)) {
key = Hash(&data[2]);
bucket = kStaticDictionaryBuckets[key];
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
if (data[0] == 0xc2) {
AddMatch(id + 102 * n, l + 2, l, matches);
found_match = true;
} else if (l + 2 < max_length && data[l + 2] == ' ') {
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
AddMatch(id + t * n, l + 3, l, matches);
found_match = true;
}
}
}
}
}
if (max_length >= 9) {
// Transforms with prefixes " the " and ".com/"
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
data[3] == 'e' && data[4] == ' ') ||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
data[3] == 'm' && data[4] == '/')) {
key = Hash(&data[5]);
bucket = kStaticDictionaryBuckets[key];
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
found_match = true;
if (l + 5 < max_length) {
const uint8_t* s = &data[l + 5];
if (data[0] == ' ') {
if (l + 8 < max_length &&
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
AddMatch(id + 62 * n, l + 9, l, matches);
if (l + 12 < max_length &&
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
AddMatch(id + 73 * n, l + 13, l, matches);
}
}
}
}
}
}
}
}
return found_match;
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,32 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Class to model the static dictionary.
#ifndef BROTLI_ENC_STATIC_DICT_H_
#define BROTLI_ENC_STATIC_DICT_H_
#include "./types.h"
namespace brotli {
static const size_t kMaxDictionaryMatchLen = 37;
static const uint32_t kInvalidMatch = 0xfffffff;
// Matches data against static dictionary words, and for each length l,
// for which a match is found, updates matches[l] to be the minimum possible
// (distance << 5) + len_code.
// Prerequisites:
// matches array is at least kMaxDictionaryMatchLen + 1 long
// all elements are initialized to kInvalidMatch
bool FindAllStaticDictionaryMatches(const uint8_t* data,
size_t min_length,
size_t max_length,
uint32_t* matches);
} // namespace brotli
#endif // BROTLI_ENC_STATIC_DICT_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,114 @@
/* Copyright 2009 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Convience routines to make Brotli I/O classes from some memory containers and
// files.
#include "./streams.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
namespace brotli {
BrotliMemOut::BrotliMemOut(void* buf, size_t len)
: buf_(buf),
len_(len),
pos_(0) {}
void BrotliMemOut::Reset(void* buf, size_t len) {
buf_ = buf;
len_ = len;
pos_ = 0;
}
// Brotli output routine: copy n bytes to the output buffer.
bool BrotliMemOut::Write(const void *buf, size_t n) {
if (n + pos_ > len_)
return false;
char* p = reinterpret_cast<char*>(buf_) + pos_;
memcpy(p, buf, n);
pos_ += n;
return true;
}
BrotliStringOut::BrotliStringOut(std::string* buf, size_t max_size)
: buf_(buf),
max_size_(max_size) {
assert(buf->empty());
}
void BrotliStringOut::Reset(std::string* buf, size_t max_size) {
buf_ = buf;
max_size_ = max_size;
}
// Brotli output routine: add n bytes to a string.
bool BrotliStringOut::Write(const void *buf, size_t n) {
if (buf_->size() + n > max_size_)
return false;
buf_->append(static_cast<const char*>(buf), n);
return true;
}
BrotliMemIn::BrotliMemIn(const void* buf, size_t len)
: buf_(buf),
len_(len),
pos_(0) {}
void BrotliMemIn::Reset(const void* buf, size_t len) {
buf_ = buf;
len_ = len;
pos_ = 0;
}
// Brotli input routine: read the next chunk of memory.
const void* BrotliMemIn::Read(size_t n, size_t* output) {
if (pos_ == len_) {
return NULL;
}
if (n > len_ - pos_)
n = len_ - pos_;
const char* p = reinterpret_cast<const char*>(buf_) + pos_;
pos_ += n;
*output = n;
return p;
}
BrotliFileIn::BrotliFileIn(FILE* f, size_t max_read_size)
: f_(f),
buf_(new char[max_read_size]),
buf_size_(max_read_size) { }
BrotliFileIn::~BrotliFileIn(void) {
delete[] buf_;
}
const void* BrotliFileIn::Read(size_t n, size_t* bytes_read) {
if (n > buf_size_) {
n = buf_size_;
} else if (n == 0) {
return feof(f_) ? NULL : buf_;
}
*bytes_read = fread(buf_, 1, n, f_);
if (*bytes_read == 0) {
return NULL;
} else {
return buf_;
}
}
BrotliFileOut::BrotliFileOut(FILE* f) : f_(f) {}
bool BrotliFileOut::Write(const void* buf, size_t n) {
if (fwrite(buf, n, 1, f_) != 1) {
return false;
}
return true;
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,121 @@
/* Copyright 2009 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Input and output classes for streaming brotli compression.
#ifndef BROTLI_ENC_STREAMS_H_
#define BROTLI_ENC_STREAMS_H_
#include <stdio.h>
#include <string>
#include "./port.h"
#include "./types.h"
namespace brotli {
// Input interface for the compression routines.
class BrotliIn {
public:
virtual ~BrotliIn(void) {}
// Return a pointer to the next block of input of at most n bytes.
// Return the actual length in *nread.
// At end of data, return NULL. Don't return NULL if there is more data
// to read, even if called with n == 0.
// Read will only be called if some of its bytes are needed.
virtual const void* Read(size_t n, size_t* nread) = 0;
};
// Output interface for the compression routines.
class BrotliOut {
public:
virtual ~BrotliOut(void) {}
// Write n bytes of data from buf.
// Return true if all written, false otherwise.
virtual bool Write(const void *buf, size_t n) = 0;
};
// Adapter class to make BrotliIn objects from raw memory.
class BrotliMemIn : public BrotliIn {
public:
BrotliMemIn(const void* buf, size_t len);
void Reset(const void* buf, size_t len);
// returns the amount of data consumed
size_t position(void) const { return pos_; }
const void* Read(size_t n, size_t* OUTPUT);
private:
const void* buf_; // start of input buffer
size_t len_; // length of input
size_t pos_; // current read position within input
};
// Adapter class to make BrotliOut objects from raw memory.
class BrotliMemOut : public BrotliOut {
public:
BrotliMemOut(void* buf, size_t len);
void Reset(void* buf, size_t len);
// returns the amount of data written
size_t position(void) const { return pos_; }
bool Write(const void* buf, size_t n);
private:
void* buf_; // start of output buffer
size_t len_; // length of output
size_t pos_; // current write position within output
};
// Adapter class to make BrotliOut objects from a string.
class BrotliStringOut : public BrotliOut {
public:
// Create a writer that appends its data to buf.
// buf->size() will grow to at most max_size
// buf is expected to be empty when constructing BrotliStringOut.
BrotliStringOut(std::string* buf, size_t max_size);
void Reset(std::string* buf, size_t max_len);
bool Write(const void* buf, size_t n);
private:
std::string* buf_; // start of output buffer
size_t max_size_; // max length of output
};
// Adapter class to make BrotliIn object from a file.
class BrotliFileIn : public BrotliIn {
public:
BrotliFileIn(FILE* f, size_t max_read_size);
~BrotliFileIn(void);
const void* Read(size_t n, size_t* bytes_read);
private:
FILE* f_;
char* buf_;
size_t buf_size_;
};
// Adapter class to make BrotliOut object from a file.
class BrotliFileOut : public BrotliOut {
public:
explicit BrotliFileOut(FILE* f);
bool Write(const void* buf, size_t n);
private:
FILE* f_;
};
} // namespace brotli
#endif // BROTLI_ENC_STREAMS_H_

Просмотреть файл

@ -0,0 +1,248 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Transformations on dictionary words.
#ifndef BROTLI_ENC_TRANSFORM_H_
#define BROTLI_ENC_TRANSFORM_H_
#include <string>
#include "./dictionary.h"
namespace brotli {
enum WordTransformType {
kIdentity = 0,
kOmitLast1 = 1,
kOmitLast2 = 2,
kOmitLast3 = 3,
kOmitLast4 = 4,
kOmitLast5 = 5,
kOmitLast6 = 6,
kOmitLast7 = 7,
kOmitLast8 = 8,
kOmitLast9 = 9,
kUppercaseFirst = 10,
kUppercaseAll = 11,
kOmitFirst1 = 12,
kOmitFirst2 = 13,
kOmitFirst3 = 14,
kOmitFirst4 = 15,
kOmitFirst5 = 16,
kOmitFirst6 = 17,
kOmitFirst7 = 18,
kOmitFirst8 = 19,
kOmitFirst9 = 20
};
struct Transform {
const char* prefix;
WordTransformType word_transform;
const char* suffix;
};
static const Transform kTransforms[] = {
{ "", kIdentity, "" },
{ "", kIdentity, " " },
{ " ", kIdentity, " " },
{ "", kOmitFirst1, "" },
{ "", kUppercaseFirst, " " },
{ "", kIdentity, " the " },
{ " ", kIdentity, "" },
{ "s ", kIdentity, " " },
{ "", kIdentity, " of " },
{ "", kUppercaseFirst, "" },
{ "", kIdentity, " and " },
{ "", kOmitFirst2, "" },
{ "", kOmitLast1, "" },
{ ", ", kIdentity, " " },
{ "", kIdentity, ", " },
{ " ", kUppercaseFirst, " " },
{ "", kIdentity, " in " },
{ "", kIdentity, " to " },
{ "e ", kIdentity, " " },
{ "", kIdentity, "\"" },
{ "", kIdentity, "." },
{ "", kIdentity, "\">" },
{ "", kIdentity, "\n" },
{ "", kOmitLast3, "" },
{ "", kIdentity, "]" },
{ "", kIdentity, " for " },
{ "", kOmitFirst3, "" },
{ "", kOmitLast2, "" },
{ "", kIdentity, " a " },
{ "", kIdentity, " that " },
{ " ", kUppercaseFirst, "" },
{ "", kIdentity, ". " },
{ ".", kIdentity, "" },
{ " ", kIdentity, ", " },
{ "", kOmitFirst4, "" },
{ "", kIdentity, " with " },
{ "", kIdentity, "'" },
{ "", kIdentity, " from " },
{ "", kIdentity, " by " },
{ "", kOmitFirst5, "" },
{ "", kOmitFirst6, "" },
{ " the ", kIdentity, "" },
{ "", kOmitLast4, "" },
{ "", kIdentity, ". The " },
{ "", kUppercaseAll, "" },
{ "", kIdentity, " on " },
{ "", kIdentity, " as " },
{ "", kIdentity, " is " },
{ "", kOmitLast7, "" },
{ "", kOmitLast1, "ing " },
{ "", kIdentity, "\n\t" },
{ "", kIdentity, ":" },
{ " ", kIdentity, ". " },
{ "", kIdentity, "ed " },
{ "", kOmitFirst9, "" },
{ "", kOmitFirst7, "" },
{ "", kOmitLast6, "" },
{ "", kIdentity, "(" },
{ "", kUppercaseFirst, ", " },
{ "", kOmitLast8, "" },
{ "", kIdentity, " at " },
{ "", kIdentity, "ly " },
{ " the ", kIdentity, " of " },
{ "", kOmitLast5, "" },
{ "", kOmitLast9, "" },
{ " ", kUppercaseFirst, ", " },
{ "", kUppercaseFirst, "\"" },
{ ".", kIdentity, "(" },
{ "", kUppercaseAll, " " },
{ "", kUppercaseFirst, "\">" },
{ "", kIdentity, "=\"" },
{ " ", kIdentity, "." },
{ ".com/", kIdentity, "" },
{ " the ", kIdentity, " of the " },
{ "", kUppercaseFirst, "'" },
{ "", kIdentity, ". This " },
{ "", kIdentity, "," },
{ ".", kIdentity, " " },
{ "", kUppercaseFirst, "(" },
{ "", kUppercaseFirst, "." },
{ "", kIdentity, " not " },
{ " ", kIdentity, "=\"" },
{ "", kIdentity, "er " },
{ " ", kUppercaseAll, " " },
{ "", kIdentity, "al " },
{ " ", kUppercaseAll, "" },
{ "", kIdentity, "='" },
{ "", kUppercaseAll, "\"" },
{ "", kUppercaseFirst, ". " },
{ " ", kIdentity, "(" },
{ "", kIdentity, "ful " },
{ " ", kUppercaseFirst, ". " },
{ "", kIdentity, "ive " },
{ "", kIdentity, "less " },
{ "", kUppercaseAll, "'" },
{ "", kIdentity, "est " },
{ " ", kUppercaseFirst, "." },
{ "", kUppercaseAll, "\">" },
{ " ", kIdentity, "='" },
{ "", kUppercaseFirst, "," },
{ "", kIdentity, "ize " },
{ "", kUppercaseAll, "." },
{ "\xc2\xa0", kIdentity, "" },
{ " ", kIdentity, "," },
{ "", kUppercaseFirst, "=\"" },
{ "", kUppercaseAll, "=\"" },
{ "", kIdentity, "ous " },
{ "", kUppercaseAll, ", " },
{ "", kUppercaseFirst, "='" },
{ " ", kUppercaseFirst, "," },
{ " ", kUppercaseAll, "=\"" },
{ " ", kUppercaseAll, ", " },
{ "", kUppercaseAll, "," },
{ "", kUppercaseAll, "(" },
{ "", kUppercaseAll, ". " },
{ " ", kUppercaseAll, "." },
{ "", kUppercaseAll, "='" },
{ " ", kUppercaseAll, ". " },
{ " ", kUppercaseFirst, "=\"" },
{ " ", kUppercaseAll, "='" },
{ " ", kUppercaseFirst, "='" },
};
static const size_t kNumTransforms =
sizeof(kTransforms) / sizeof(kTransforms[0]);
static const size_t kOmitLastNTransforms[10] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
};
static size_t ToUpperCase(uint8_t *p, size_t len) {
if (len == 1 || p[0] < 0xc0) {
if (p[0] >= 'a' && p[0] <= 'z') {
p[0] ^= 32;
}
return 1;
}
if (p[0] < 0xe0) {
p[1] ^= 32;
return 2;
}
if (len == 2) {
return 2;
}
p[2] ^= 5;
return 3;
}
inline std::string TransformWord(
WordTransformType transform_type, const uint8_t* word, size_t len) {
if (transform_type <= kOmitLast9) {
if (len <= static_cast<size_t>(transform_type)) {
return std::string();
}
return std::string(word, word + len - transform_type);
}
if (transform_type >= kOmitFirst1) {
const size_t skip = transform_type - (kOmitFirst1 - 1);
if (len <= skip) {
return std::string();
}
return std::string(word + skip, word + len);
}
std::string ret = std::string(word, word + len);
uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
if (transform_type == kUppercaseFirst) {
ToUpperCase(uppercase, len);
} else if (transform_type == kUppercaseAll) {
size_t position = 0;
while (position < len) {
size_t step = ToUpperCase(uppercase, len - position);
uppercase += step;
position += step;
}
}
return ret;
}
inline std::string ApplyTransform(
const Transform& t, const uint8_t* word, size_t len) {
return std::string(t.prefix) +
TransformWord(t.word_transform, word, len) + std::string(t.suffix);
}
inline std::string GetTransformedDictionaryWord(size_t len_code,
size_t word_id) {
size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
size_t t = word_id / num_words;
size_t word_idx = word_id % num_words;
offset += len_code * word_idx;
const uint8_t* word = &kBrotliDictionary[offset];
return ApplyTransform(kTransforms[t], word, len_code);
}
} // namespace brotli
#endif // BROTLI_ENC_TRANSFORM_H_

Просмотреть файл

@ -0,0 +1,29 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Common types */
#ifndef BROTLI_ENC_TYPES_H_
#define BROTLI_ENC_TYPES_H_
#include <stddef.h> /* for size_t */
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
typedef __int64 int64_t;
#else
#include <stdint.h>
#endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
#define MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
#endif /* BROTLI_ENC_TYPES_H_ */

Просмотреть файл

@ -0,0 +1,83 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Heuristics for deciding about the UTF8-ness of strings.
#include "./utf8_util.h"
#include "./types.h"
namespace brotli {
namespace {
size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
// ASCII
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
// 2-byte UTF8
if (size > 1u &&
(input[0] & 0xe0) == 0xc0 &&
(input[1] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x1f) << 6) |
(input[1] & 0x3f));
if (*symbol > 0x7f) {
return 2;
}
}
// 3-byte UFT8
if (size > 2u &&
(input[0] & 0xf0) == 0xe0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x0f) << 12) |
((input[1] & 0x3f) << 6) |
(input[2] & 0x3f));
if (*symbol > 0x7ff) {
return 3;
}
}
// 4-byte UFT8
if (size > 3u &&
(input[0] & 0xf8) == 0xf0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80 &&
(input[3] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
((input[1] & 0x3f) << 12) |
((input[2] & 0x3f) << 6) |
(input[3] & 0x3f));
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
return 4;
}
}
// Not UTF8, emit a special symbol above the UTF8-code space
*symbol = 0x110000 | input[0];
return 1;
}
} // namespace
// Returns true if at least min_fraction of the data is UTF8-encoded.
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
const size_t length, const double min_fraction) {
size_t size_utf8 = 0;
size_t i = 0;
while (i < length) {
int symbol;
size_t bytes_read = ParseAsUTF8(
&symbol, &data[(pos + i) & mask], length - i);
i += bytes_read;
if (symbol < 0x110000) size_utf8 += bytes_read;
}
return size_utf8 > min_fraction * static_cast<double>(length);
}
} // namespace brotli

Просмотреть файл

@ -0,0 +1,25 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Heuristics for deciding about the UTF8-ness of strings.
#ifndef BROTLI_ENC_UTF8_UTIL_H_
#define BROTLI_ENC_UTF8_UTIL_H_
#include "./types.h"
namespace brotli {
static const double kMinUTF8Ratio = 0.75;
// Returns true if at least min_fraction of the bytes between pos and
// pos + length in the (data, mask) ringbuffer is UTF8-encoded.
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
const size_t length, const double min_fraction);
} // namespace brotli
#endif // BROTLI_ENC_UTF8_UTIL_H_

Просмотреть файл

@ -0,0 +1,84 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Write bits into a byte array.
#ifndef BROTLI_ENC_WRITE_BITS_H_
#define BROTLI_ENC_WRITE_BITS_H_
#include <assert.h>
#include <stdio.h>
#include "./port.h"
#include "./types.h"
namespace brotli {
//#define BIT_WRITER_DEBUG
// This function writes bits into bytes in increasing addresses, and within
// a byte least-significant-bit first.
//
// The function can write up to 56 bits in one go with WriteBits
// Example: let's assume that 3 bits (Rs below) have been written already:
//
// BYTE-0 BYTE+1 BYTE+2
//
// 0000 0RRR 0000 0000 0000 0000
//
// Now, we could write 5 or less bits in MSB by just sifting by 3
// and OR'ing to BYTE-0.
//
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
// and locate the rest in BYTE+1, BYTE+2, etc.
inline void WriteBits(size_t n_bits,
uint64_t bits,
size_t * __restrict pos,
uint8_t * __restrict array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
#endif
assert((bits >> n_bits) == 0);
assert(n_bits <= 56);
#ifdef IS_LITTLE_ENDIAN
// This branch of the code can write up to 56 bits at a time,
// 7 bits are lost by being perhaps already in *p and at least
// 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
// bits are in *p and we write 57 bits, then the next write will
// access a byte that was never initialized).
uint8_t *p = &array[*pos >> 3];
uint64_t v = *p;
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
*pos += n_bits;
#else
// implicit & 0xff is assumed for uint8_t arithmetics
uint8_t *array_pos = &array[*pos >> 3];
const size_t bits_reserved_in_first_byte = (*pos & 7);
bits <<= bits_reserved_in_first_byte;
*array_pos++ |= static_cast<uint8_t>(bits);
for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
bits_left_to_write >= 9;
bits_left_to_write -= 8) {
bits >>= 8;
*array_pos++ = static_cast<uint8_t>(bits);
}
*array_pos = 0;
*pos += n_bits;
#endif
}
inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBitsPrepareStorage %10d\n", pos);
#endif
assert((pos & 7) == 0);
array[pos >> 3] = 0;
}
} // namespace brotli
#endif // BROTLI_ENC_WRITE_BITS_H_

Просмотреть файл

@ -0,0 +1,25 @@
#brotli/tools
include ../shared.mk
BROTLI = ..
ENCOBJ = $(BROTLI)/enc/*.o
DECOBJ = $(BROTLI)/dec/*.o
EXECUTABLES=bro
EXE_OBJS=$(patsubst %, %.o, $(EXECUTABLES))
all : $(EXECUTABLES)
$(EXECUTABLES) : $(EXE_OBJS) deps
$(CXX) $(LDFLAGS) $(ENCOBJ) $(DECOBJ) $@.o -o $@
deps :
$(MAKE) -C $(BROTLI)/dec
$(MAKE) -C $(BROTLI)/enc nodict
clean :
rm -f $(OBJS) $(EXE_OBJS) $(EXECUTABLES)
$(MAKE) -C $(BROTLI)/dec clean
$(MAKE) -C $(BROTLI)/enc clean

335
modules/brotli/tools/bro.cc Normal file
Просмотреть файл

@ -0,0 +1,335 @@
/* Copyright 2014 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Example main() function for Brotli library. */
#include <fcntl.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <string>
#include "../dec/decode.h"
#include "../enc/compressor.h"
#if !defined(_WIN32)
#include <unistd.h>
#else
#include <io.h>
#define STDIN_FILENO _fileno(stdin)
#define STDOUT_FILENO _fileno(stdout)
#define S_IRUSR S_IREAD
#define S_IWUSR S_IWRITE
#define fdopen _fdopen
#define unlink _unlink
#define fopen ms_fopen
#define open ms_open
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
#define fseek _fseeki64
#define ftell _ftelli64
#endif
static inline FILE* ms_fopen(const char *filename, const char *mode) {
FILE* result = 0;
fopen_s(&result, filename, mode);
return result;
}
static inline int ms_open(const char *filename, int oflag, int pmode) {
int result = -1;
_sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
return result;
}
#endif /* WIN32 */
static bool ParseQuality(const char* s, int* quality) {
if (s[0] >= '0' && s[0] <= '9') {
*quality = s[0] - '0';
if (s[1] >= '0' && s[1] <= '9') {
*quality = *quality * 10 + s[1] - '0';
return s[2] == 0;
}
return s[1] == 0;
}
return false;
}
static void ParseArgv(int argc, char **argv,
char **input_path,
char **output_path,
int *force,
int *quality,
int *decompress,
int *repeat,
int *verbose,
int *lgwin) {
*force = 0;
*input_path = 0;
*output_path = 0;
*repeat = 1;
*verbose = 0;
*lgwin = 22;
{
size_t argv0_len = strlen(argv[0]);
*decompress =
argv0_len >= 5 && strcmp(&argv[0][argv0_len - 5], "unbro") == 0;
}
for (int k = 1; k < argc; ++k) {
if (!strcmp("--force", argv[k]) ||
!strcmp("-f", argv[k])) {
if (*force != 0) {
goto error;
}
*force = 1;
continue;
} else if (!strcmp("--decompress", argv[k]) ||
!strcmp("--uncompress", argv[k]) ||
!strcmp("-d", argv[k])) {
*decompress = 1;
continue;
} else if (!strcmp("--verbose", argv[k]) ||
!strcmp("-v", argv[k])) {
if (*verbose != 0) {
goto error;
}
*verbose = 1;
continue;
}
if (k < argc - 1) {
if (!strcmp("--input", argv[k]) ||
!strcmp("--in", argv[k]) ||
!strcmp("-i", argv[k])) {
if (*input_path != 0) {
goto error;
}
*input_path = argv[k + 1];
++k;
continue;
} else if (!strcmp("--output", argv[k]) ||
!strcmp("--out", argv[k]) ||
!strcmp("-o", argv[k])) {
if (*output_path != 0) {
goto error;
}
*output_path = argv[k + 1];
++k;
continue;
} else if (!strcmp("--quality", argv[k]) ||
!strcmp("-q", argv[k])) {
if (!ParseQuality(argv[k + 1], quality)) {
goto error;
}
++k;
continue;
} else if (!strcmp("--repeat", argv[k]) ||
!strcmp("-r", argv[k])) {
if (!ParseQuality(argv[k + 1], repeat)) {
goto error;
}
++k;
continue;
} else if (!strcmp("--window", argv[k]) ||
!strcmp("-w", argv[k])) {
if (!ParseQuality(argv[k + 1], lgwin)) {
goto error;
}
if (*lgwin < 10 || *lgwin >= 25) {
goto error;
}
++k;
continue;
}
}
goto error;
}
return;
error:
fprintf(stderr,
"Usage: %s [--force] [--quality n] [--decompress]"
" [--input filename] [--output filename] [--repeat iters]"
" [--verbose] [--window n]\n",
argv[0]);
exit(1);
}
static FILE* OpenInputFile(const char* input_path) {
if (input_path == 0) {
return fdopen(STDIN_FILENO, "rb");
}
FILE* f = fopen(input_path, "rb");
if (f == 0) {
perror("fopen");
exit(1);
}
return f;
}
static FILE *OpenOutputFile(const char *output_path, const int force) {
if (output_path == 0) {
return fdopen(STDOUT_FILENO, "wb");
}
int excl = force ? 0 : O_EXCL;
int fd = open(output_path, O_CREAT | excl | O_WRONLY | O_TRUNC,
S_IRUSR | S_IWUSR);
if (fd < 0) {
if (!force) {
struct stat statbuf;
if (stat(output_path, &statbuf) == 0) {
fprintf(stderr, "output file exists\n");
exit(1);
}
}
perror("open");
exit(1);
}
return fdopen(fd, "wb");
}
static int64_t FileSize(char *path) {
FILE *f = fopen(path, "rb");
if (f == NULL) {
return -1;
}
if (fseek(f, 0L, SEEK_END) != 0) {
fclose(f);
return -1;
}
int64_t retval = ftell(f);
if (fclose(f) != 0) {
return -1;
}
return retval;
}
static const size_t kFileBufferSize = 65536;
static void Decompresss(FILE* fin, FILE* fout) {
BrotliState* s = BrotliCreateState(NULL, NULL, NULL);
if (!s) {
fprintf(stderr, "out of memory\n");
exit(1);
}
uint8_t* input = new uint8_t[kFileBufferSize];
uint8_t* output = new uint8_t[kFileBufferSize];
size_t total_out;
size_t available_in;
const uint8_t* next_in;
size_t available_out = kFileBufferSize;
uint8_t* next_out = output;
BrotliResult result = BROTLI_RESULT_NEEDS_MORE_INPUT;
while (1) {
if (result == BROTLI_RESULT_NEEDS_MORE_INPUT) {
if (feof(fin)) {
break;
}
available_in = fread(input, 1, kFileBufferSize, fin);
next_in = input;
if (ferror(fin)) {
break;
}
} else if (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
fwrite(output, 1, kFileBufferSize, fout);
if (ferror(fout)) {
break;
}
available_out = kFileBufferSize;
next_out = output;
} else {
break; /* Error or success. */
}
result = BrotliDecompressStream(&available_in, &next_in,
&available_out, &next_out, &total_out, s);
}
if (next_out != output) {
fwrite(output, 1, static_cast<size_t>(next_out - output), fout);
}
delete[] input;
delete[] output;
BrotliDestroyState(s);
if ((result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) || ferror(fout)) {
fprintf(stderr, "failed to write output\n");
exit(1);
} else if (result != BROTLI_RESULT_SUCCESS) { /* Error or needs more input. */
fprintf(stderr, "corrupt input\n");
exit(1);
}
}
int main(int argc, char** argv) {
char *input_path = 0;
char *output_path = 0;
int force = 0;
int quality = 11;
int decompress = 0;
int repeat = 1;
int verbose = 0;
int lgwin = 0;
ParseArgv(argc, argv, &input_path, &output_path, &force,
&quality, &decompress, &repeat, &verbose, &lgwin);
const clock_t clock_start = clock();
for (int i = 0; i < repeat; ++i) {
FILE* fin = OpenInputFile(input_path);
FILE* fout = OpenOutputFile(output_path, force);
if (decompress) {
Decompresss(fin, fout);
} else {
brotli::BrotliParams params;
params.lgwin = lgwin;
params.quality = quality;
try {
brotli::BrotliFileIn in(fin, 1 << 16);
brotli::BrotliFileOut out(fout);
if (!BrotliCompress(params, &in, &out)) {
fprintf(stderr, "compression failed\n");
unlink(output_path);
exit(1);
}
} catch (std::bad_alloc&) {
fprintf(stderr, "not enough memory\n");
unlink(output_path);
exit(1);
}
}
if (fclose(fin) != 0) {
perror("fclose");
exit(1);
}
if (fclose(fout) != 0) {
perror("fclose");
exit(1);
}
}
if (verbose) {
const clock_t clock_end = clock();
double duration =
static_cast<double>(clock_end - clock_start) / CLOCKS_PER_SEC;
if (duration < 1e-9) {
duration = 1e-9;
}
int64_t uncompressed_size = FileSize(decompress ? output_path : input_path);
if (uncompressed_size == -1) {
fprintf(stderr, "failed to determine uncompressed file size\n");
exit(1);
}
double uncompressed_bytes_in_MB =
static_cast<double>(repeat * uncompressed_size) / (1024.0 * 1024.0);
if (decompress) {
printf("Brotli decompression speed: ");
} else {
printf("Brotli compression speed: ");
}
printf("%g MB/s\n", uncompressed_bytes_in_MB / duration);
}
return 0;
}

Просмотреть файл

@ -0,0 +1,92 @@
#!/usr/bin/python
#
# Takes an .nroff source file and prints a text file in RFC format.
#
# Usage: rfc-format.py <source file>
import re
import sys
from subprocess import Popen, PIPE
def Readfile(fn):
f = open(fn, "r")
return f.read()
def FixNroffOutput(buf):
p = re.compile(r'(.*)FORMFEED(\[Page\s+\d+\])$')
strip_empty = False
out = ""
for line in buf.split("\n"):
line = line.replace("\xe2\x80\x99", "'")
line = line.replace("\xe2\x80\x90", "-")
for i in range(len(line)):
if ord(line[i]) > 128:
print >>sys.stderr, "Invalid character %d\n" % ord(line[i])
m = p.search(line)
if strip_empty and len(line) == 0:
continue
if m:
out += p.sub(r'\1 \2\n\f', line)
out += "\n"
strip_empty = True
else:
out += "%s\n" % line
strip_empty = False
return out.rstrip("\n")
def Nroff(buf):
p = Popen(["nroff", "-ms"], stdin=PIPE, stdout=PIPE)
out, err = p.communicate(input=buf)
return FixNroffOutput(out)
def FormatTocLine(section, title, page):
line = ""
level = 1
if section:
level = section.count(".")
for i in range(level):
line += " "
if section:
line += "%s " % section
line += "%s " % title
pagenum = "%d" % page
nspace = 72 - len(line) - len(pagenum)
if nspace % 2:
line += " "
for i in range(nspace / 2):
line += ". "
line += "%d\n" % page
return line
def CreateToc(buf):
p1 = re.compile(r'^((\d+\.)+)\s+(.*)$')
p2 = re.compile(r'^(Appendix [A-Z].)\s+(.*)$')
p3 = re.compile(r'\[Page (\d+)\]$')
found = 0
page = 1
out = ""
for line in buf.split("\n"):
m1 = p1.search(line)
m2 = p2.search(line)
m3 = p3.search(line)
if m1:
out += FormatTocLine(m1.group(1), m1.group(3), page)
elif m2:
out += FormatTocLine(m2.group(1), m2.group(2), page)
elif line.startswith("Authors"):
out += FormatTocLine(None, line, page)
elif m3:
page = int(m3.group(1)) + 1
return out
src = Readfile(sys.argv[1])
out = Nroff(src)
toc = CreateToc(out)
src = src.replace("INSERT_TOC_HERE", toc)
print Nroff(src)

Просмотреть файл

@ -0,0 +1,14 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Defines a common version string used by all of the brotli tools. */
#ifndef BROTLI_TOOLS_VERSION_H_
#define BROTLI_TOOLS_VERSION_H_
#define BROTLI_VERSION "0.4.0"
#endif /* BROTLI_TOOLS_VERSION_H_ */

Просмотреть файл

@ -6,14 +6,19 @@
MY_TEMP_DIR=`mktemp -d -t brotli_update.XXXXXX` || exit 1
git clone https://github.com/google/brotli ${MY_TEMP_DIR}/brotli
git -C ${MY_TEMP_DIR}/brotli checkout v0.4.0
COMMIT=`(cd ${MY_TEMP_DIR}/brotli && git log | head -n 1)`
perl -p -i -e "s/\[commit [0-9a-f]{40}\]/[${COMMIT}]/" README.mozilla;
COMMIT=$(git -C ${MY_TEMP_DIR}/brotli rev-parse HEAD)
perl -p -i -e "s/\[commit [0-9a-f]{40}\]/[commit ${COMMIT}]/" README.mozilla;
rm -rf dec
mv ${MY_TEMP_DIR}/brotli/dec dec
DIRS="dec enc tools"
for d in $DIRS; do
rm -rf $d
mv ${MY_TEMP_DIR}/brotli/$d $d
done
rm -rf ${MY_TEMP_DIR}
hg add dec
hg addremove $DIRS
echo "###"
echo "### Updated brotli/dec to $COMMIT."