Bug 1355671 - Import brotli encoder and command line tool code. r=gps

--HG-- extra : rebase_source : f7a1326a7d39a34828b596140da8482718828352
2017-04-07 15:57:31 +09:00 · 2017-04-07 15:57:31 +09:00 · 057318a71f
--- a/modules/brotli/enc/Makefile
+++ b/modules/brotli/enc/Makefile
@ -0,0 +1,14 @@
+#brotli/enc
+
+include ../shared.mk
+
+OBJS_NODICT = backward_references.o block_splitter.o brotli_bit_stream.o compress_fragment.o compress_fragment_two_pass.o encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o metablock.o static_dict.o streams.o utf8_util.o
+OBJS = $(OBJS_NODICT) dictionary.o
+
+nodict : $(OBJS_NODICT)
+
+all : $(OBJS)
+
+clean :
+	rm -f $(OBJS) $(SO)
+
--- a/modules/brotli/enc/backward_references.cc
+++ b/modules/brotli/enc/backward_references.cc
@ -0,0 +1,858 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Function to find backward reference copies.
+
+#include "./backward_references.h"
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "./command.h"
+#include "./fast_log.h"
+#include "./literal_cost.h"
+
+namespace brotli {
+
+// The maximum length for which the zopflification uses distinct distances.
+static const uint16_t kMaxZopfliLen = 325;
+
+// Histogram based cost model for zopflification.
+class ZopfliCostModel {
+ public:
+  ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {}
+
+  void SetFromCommands(size_t num_bytes,
+                       size_t position,
+                       const uint8_t* ringbuffer,
+                       size_t ringbuffer_mask,
+                       const Command* commands,
+                       size_t num_commands,
+                       size_t last_insert_len) {
+    std::vector<uint32_t> histogram_literal(256, 0);
+    std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0);
+    std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0);
+
+    size_t pos = position - last_insert_len;
+    for (size_t i = 0; i < num_commands; i++) {
+      size_t inslength = commands[i].insert_len_;
+      size_t copylength = commands[i].copy_len();
+      size_t distcode = commands[i].dist_prefix_;
+      size_t cmdcode = commands[i].cmd_prefix_;
+
+      histogram_cmd[cmdcode]++;
+      if (cmdcode >= 128) histogram_dist[distcode]++;
+
+      for (size_t j = 0; j < inslength; j++) {
+        histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
+      }
+
+      pos += inslength + copylength;
+    }
+
+    std::vector<float> cost_literal;
+    Set(histogram_literal, &cost_literal);
+    Set(histogram_cmd, &cost_cmd_);
+    Set(histogram_dist, &cost_dist_);
+
+    for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
+      min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
+    }
+
+    literal_costs_.resize(num_bytes + 1);
+    literal_costs_[0] = 0.0;
+    for (size_t i = 0; i < num_bytes; ++i) {
+      literal_costs_[i + 1] = literal_costs_[i] +
+          cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
+    }
+  }
+
+  void SetFromLiteralCosts(size_t num_bytes,
+                           size_t position,
+                           const uint8_t* ringbuffer,
+                           size_t ringbuffer_mask) {
+    literal_costs_.resize(num_bytes + 2);
+    EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
+                                ringbuffer, &literal_costs_[1]);
+    literal_costs_[0] = 0.0;
+    for (size_t i = 0; i < num_bytes; ++i) {
+      literal_costs_[i + 1] += literal_costs_[i];
+    }
+    cost_cmd_.resize(kNumCommandPrefixes);
+    cost_dist_.resize(kNumDistancePrefixes);
+    for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
+      cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
+    }
+    for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
+      cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
+    }
+    min_cost_cmd_ = static_cast<float>(FastLog2(11));
+  }
+
+  float GetCommandCost(
+      size_t dist_code, size_t length_code, size_t insert_length) const {
+    uint16_t inscode = GetInsertLengthCode(insert_length);
+    uint16_t copycode = GetCopyLengthCode(length_code);
+    uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0);
+    uint16_t dist_symbol;
+    uint32_t distextra;
+    PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
+    uint32_t distnumextra = distextra >> 24;
+
+    float result = static_cast<float>(
+        GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
+    result += cost_cmd_[cmdcode];
+    if (cmdcode >= 128) result += cost_dist_[dist_symbol];
+    return result;
+  }
+
+  float GetLiteralCosts(size_t from, size_t to) const {
+    return literal_costs_[to] - literal_costs_[from];
+  }
+
+  float GetMinCostCmd(void) const {
+    return min_cost_cmd_;
+  }
+
+ private:
+  void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
+    cost->resize(histogram.size());
+    size_t sum = 0;
+    for (size_t i = 0; i < histogram.size(); i++) {
+      sum += histogram[i];
+    }
+    float log2sum = static_cast<float>(FastLog2(sum));
+    for (size_t i = 0; i < histogram.size(); i++) {
+      if (histogram[i] == 0) {
+        (*cost)[i] = log2sum + 2;
+        continue;
+      }
+
+      // Shannon bits for this symbol.
+      (*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
+
+      // Cannot be coded with less than 1 bit
+      if ((*cost)[i] < 1) (*cost)[i] = 1;
+    }
+  }
+
+  std::vector<float> cost_cmd_;  // The insert and copy length symbols.
+  std::vector<float> cost_dist_;
+  // Cumulative costs of literals per position in the stream.
+  std::vector<float> literal_costs_;
+  float min_cost_cmd_;
+};
+
+inline size_t ComputeDistanceCode(size_t distance,
+                                  size_t max_distance,
+                                  int quality,
+                                  const int* dist_cache) {
+  if (distance <= max_distance) {
+    if (distance == static_cast<size_t>(dist_cache[0])) {
+      return 0;
+    } else if (distance == static_cast<size_t>(dist_cache[1])) {
+      return 1;
+    } else if (distance == static_cast<size_t>(dist_cache[2])) {
+      return 2;
+    } else if (distance == static_cast<size_t>(dist_cache[3])) {
+      return 3;
+    } else if (quality > 3 && distance >= 6) {
+      for (size_t k = 4; k < kNumDistanceShortCodes; ++k) {
+        size_t idx = kDistanceCacheIndex[k];
+        size_t candidate =
+            static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]);
+        static const size_t kLimits[16] = {  0,  0,  0,  0,
+                                             6,  6, 11, 11,
+                                            11, 11, 11, 11,
+                                            12, 12, 12, 12 };
+        if (distance == candidate && distance >= kLimits[k]) {
+          return k;
+        }
+      }
+    }
+  }
+  return distance + 15;
+}
+
+// REQUIRES: len >= 2, start_pos <= pos
+// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
+// Maintains the "ZopfliNode array invariant".
+inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
+                             size_t len, size_t len_code, size_t dist,
+                             size_t short_code, float cost) {
+  ZopfliNode& next = nodes[pos + len];
+  next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
+  next.distance = static_cast<uint32_t>(dist | (short_code << 25));
+  next.insert_length = static_cast<uint32_t>(pos - start_pos);
+  next.cost = cost;
+}
+
+// Maintains the smallest 2^k cost difference together with their positions
+class StartPosQueue {
+ public:
+  struct PosData {
+    size_t pos;
+    int distance_cache[4];
+    float costdiff;
+  };
+
+  explicit StartPosQueue(int bits)
+      : mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
+
+  void Clear(void) {
+    idx_ = 0;
+  }
+
+  void Push(const StartPosQueue::PosData& posdata) {
+    size_t offset = ~idx_ & mask_;
+    ++idx_;
+    size_t len = size();
+    q_[offset] = posdata;
+    /* Restore the sorted order. In the list of |len| items at most |len - 1|
+       adjacent element comparisons / swaps are required. */
+    for (size_t i = 1; i < len; ++i) {
+      if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
+        std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
+      }
+      ++offset;
+    }
+  }
+
+  size_t size(void) const { return std::min(idx_, mask_ + 1); }
+
+  const StartPosQueue::PosData& GetStartPosData(size_t k) const {
+    return q_[(k - idx_) & mask_];
+  }
+
+ private:
+  const size_t mask_;
+  std::vector<PosData> q_;
+  size_t idx_;
+};
+
+// Returns the minimum possible copy length that can improve the cost of any
+// future position.
+static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
+                                       const ZopfliNode* nodes,
+                                       const ZopfliCostModel& model,
+                                       const size_t num_bytes,
+                                       const size_t pos) {
+  // Compute the minimum possible cost of reaching any future position.
+  const size_t start0 = queue.GetStartPosData(0).pos;
+  float min_cost = (nodes[start0].cost +
+                    model.GetLiteralCosts(start0, pos) +
+                    model.GetMinCostCmd());
+  size_t len = 2;
+  size_t next_len_bucket = 4;
+  size_t next_len_offset = 10;
+  while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
+    // We already reached (pos + len) with no more cost than the minimum
+    // possible cost of reaching anything from this pos, so there is no point in
+    // looking for lengths <= len.
+    ++len;
+    if (len == next_len_offset) {
+      // We reached the next copy length code bucket, so we add one more
+      // extra bit to the minimum cost.
+      min_cost += static_cast<float>(1.0);
+      next_len_offset += next_len_bucket;
+      next_len_bucket *= 2;
+    }
+  }
+  return len;
+}
+
+// Fills in dist_cache[0..3] with the last four distances (as defined by
+// Section 4. of the Spec) that would be used at (block_start + pos) if we
+// used the shortest path of commands from block_start, computed from
+// nodes[0..pos]. The last four distances at block_start are in
+// starting_dist_cach[0..3].
+// REQUIRES: nodes[pos].cost < kInfinity
+// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
+static void ComputeDistanceCache(const size_t block_start,
+                                 const size_t pos,
+                                 const size_t max_backward,
+                                 const int* starting_dist_cache,
+                                 const ZopfliNode* nodes,
+                                 int* dist_cache) {
+  int idx = 0;
+  size_t p = pos;
+  // Because of prerequisite, does at most (pos + 1) / 2 iterations.
+  while (idx < 4 && p > 0) {
+    const size_t clen = nodes[p].copy_length();
+    const size_t ilen = nodes[p].insert_length;
+    const size_t dist = nodes[p].copy_distance();
+    // Since block_start + p is the end position of the command, the copy part
+    // starts from block_start + p - clen. Distances that are greater than this
+    // or greater than max_backward are static dictionary references, and do
+    // not update the last distances. Also distance code 0 (last distance)
+    // does not update the last distances.
+    if (dist + clen <= block_start + p && dist <= max_backward &&
+        nodes[p].distance_code() > 0) {
+      dist_cache[idx++] = static_cast<int>(dist);
+    }
+    // Because of prerequisite, p >= clen + ilen >= 2.
+    p -= clen + ilen;
+  }
+  for (; idx < 4; ++idx) {
+    dist_cache[idx] = *starting_dist_cache++;
+  }
+}
+
+static void UpdateNodes(const size_t num_bytes,
+                        const size_t block_start,
+                        const size_t pos,
+                        const uint8_t* ringbuffer,
+                        const size_t ringbuffer_mask,
+                        const size_t max_backward_limit,
+                        const int* starting_dist_cache,
+                        const size_t num_matches,
+                        const BackwardMatch* matches,
+                        const ZopfliCostModel* model,
+                        StartPosQueue* queue,
+                        ZopfliNode* nodes) {
+  size_t cur_ix = block_start + pos;
+  size_t cur_ix_masked = cur_ix & ringbuffer_mask;
+  size_t max_distance = std::min(cur_ix, max_backward_limit);
+
+  if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
+    StartPosQueue::PosData posdata;
+    posdata.pos = pos;
+    posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
+    ComputeDistanceCache(block_start, pos, max_backward_limit,
+                         starting_dist_cache, nodes, posdata.distance_cache);
+    queue->Push(posdata);
+  }
+
+  const size_t min_len = ComputeMinimumCopyLength(
+      *queue, nodes, *model, num_bytes, pos);
+
+  // Go over the command starting positions in order of increasing cost
+  // difference.
+  for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
+    const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
+    const size_t start = posdata.pos;
+    const float start_costdiff = posdata.costdiff;
+
+    // Look for last distance matches using the distance cache from this
+    // starting position.
+    size_t best_len = min_len - 1;
+    for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
+      const size_t idx = kDistanceCacheIndex[j];
+      const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
+                                                  kDistanceCacheOffset[j]);
+      size_t prev_ix = cur_ix - backward;
+      if (prev_ix >= cur_ix) {
+        continue;
+      }
+      if (PREDICT_FALSE(backward > max_distance)) {
+        continue;
+      }
+      prev_ix &= ringbuffer_mask;
+
+      if (cur_ix_masked + best_len > ringbuffer_mask ||
+          prev_ix + best_len > ringbuffer_mask ||
+          ringbuffer[cur_ix_masked + best_len] !=
+          ringbuffer[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&ringbuffer[prev_ix],
+                                   &ringbuffer[cur_ix_masked],
+                                   num_bytes - pos);
+      for (size_t l = best_len + 1; l <= len; ++l) {
+        const size_t inslen = pos - start;
+        float cmd_cost = model->GetCommandCost(j, l, inslen);
+        float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
+        if (cost < nodes[pos + l].cost) {
+          UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
+        }
+        best_len = l;
+      }
+    }
+
+    // At higher iterations look only for new last distance matches, since
+    // looking only for new command start positions with the same distances
+    // does not help much.
+    if (k >= 2) continue;
+
+    // Loop through all possible copy lengths at this position.
+    size_t len = min_len;
+    for (size_t j = 0; j < num_matches; ++j) {
+      BackwardMatch match = matches[j];
+      size_t dist = match.distance;
+      bool is_dictionary_match = dist > max_distance;
+      // We already tried all possible last distance matches, so we can use
+      // normal distance code here.
+      size_t dist_code = dist + 15;
+      // Try all copy lengths up until the maximum copy length corresponding
+      // to this distance. If the distance refers to the static dictionary, or
+      // the maximum length is long enough, try only one maximum length.
+      size_t max_len = match.length();
+      if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
+        len = max_len;
+      }
+      for (; len <= max_len; ++len) {
+        size_t len_code = is_dictionary_match ? match.length_code() : len;
+        const size_t inslen = pos - start;
+        float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
+        float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
+        if (cost < nodes[pos + len].cost) {
+          UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
+        }
+      }
+    }
+  }
+}
+
+static void ComputeShortestPathFromNodes(size_t num_bytes,
+                                         const ZopfliNode* nodes,
+                                         std::vector<uint32_t>* path) {
+  std::vector<uint32_t> backwards(num_bytes / 2 + 1);
+  size_t index = num_bytes;
+  while (nodes[index].cost == kInfinity) --index;
+  size_t num_commands = 0;
+  while (index != 0) {
+    size_t len = nodes[index].command_length();
+    backwards[num_commands++] = static_cast<uint32_t>(len);
+    index -= len;
+  }
+  path->resize(num_commands);
+  for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
+    (*path)[j] = backwards[i - 1];
+  }
+}
+
+void ZopfliCreateCommands(const size_t num_bytes,
+                          const size_t block_start,
+                          const size_t max_backward_limit,
+                          const std::vector<uint32_t>& path,
+                          const ZopfliNode* nodes,
+                          int* dist_cache,
+                          size_t* last_insert_len,
+                          Command* commands,
+                          size_t* num_literals) {
+  size_t pos = 0;
+  for (size_t i = 0; i < path.size(); i++) {
+    const ZopfliNode& next = nodes[pos + path[i]];
+    size_t copy_length = next.copy_length();
+    size_t insert_length = next.insert_length;
+    pos += insert_length;
+    if (i == 0) {
+      insert_length += *last_insert_len;
+      *last_insert_len = 0;
+    }
+    size_t distance = next.copy_distance();
+    size_t len_code = next.length_code();
+    size_t max_distance = std::min(block_start + pos, max_backward_limit);
+    bool is_dictionary = (distance > max_distance);
+    size_t dist_code = next.distance_code();
+
+    Command cmd(insert_length, copy_length, len_code, dist_code);
+    commands[i] = cmd;
+
+    if (!is_dictionary && dist_code > 0) {
+      dist_cache[3] = dist_cache[2];
+      dist_cache[2] = dist_cache[1];
+      dist_cache[1] = dist_cache[0];
+      dist_cache[0] = static_cast<int>(distance);
+    }
+
+    *num_literals += insert_length;
+    pos += copy_length;
+  }
+  *last_insert_len += num_bytes - pos;
+}
+
+static void ZopfliIterate(size_t num_bytes,
+                          size_t position,
+                          const uint8_t* ringbuffer,
+                          size_t ringbuffer_mask,
+                          const size_t max_backward_limit,
+                          const int* dist_cache,
+                          const ZopfliCostModel& model,
+                          const std::vector<uint32_t>& num_matches,
+                          const std::vector<BackwardMatch>& matches,
+                          ZopfliNode* nodes,
+                          std::vector<uint32_t>* path) {
+  nodes[0].length = 0;
+  nodes[0].cost = 0;
+  StartPosQueue queue(3);
+  size_t cur_match_pos = 0;
+  for (size_t i = 0; i + 3 < num_bytes; i++) {
+    UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
+                max_backward_limit, dist_cache, num_matches[i],
+                &matches[cur_match_pos], &model, &queue, &nodes[0]);
+    cur_match_pos += num_matches[i];
+    // The zopflification can be too slow in case of very long lengths, so in
+    // such case skip it all, it does not cost a lot of compression ratio.
+    if (num_matches[i] == 1 &&
+        matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
+      i += matches[cur_match_pos - 1].length() - 1;
+      queue.Clear();
+    }
+  }
+  ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
+}
+
+
+void ZopfliComputeShortestPath(size_t num_bytes,
+                               size_t position,
+                               const uint8_t* ringbuffer,
+                               size_t ringbuffer_mask,
+                               const size_t max_backward_limit,
+                               const int* dist_cache,
+                               Hashers::H10* hasher,
+                               ZopfliNode* nodes,
+                               std::vector<uint32_t>* path) {
+  nodes[0].length = 0;
+  nodes[0].cost = 0;
+  ZopfliCostModel* model = new ZopfliCostModel;
+  model->SetFromLiteralCosts(num_bytes, position,
+                             ringbuffer, ringbuffer_mask);
+  StartPosQueue queue(3);
+  BackwardMatch matches[Hashers::H10::kMaxNumMatches];
+  for (size_t i = 0; i + 3 < num_bytes; i++) {
+    const size_t max_distance = std::min(position + i, max_backward_limit);
+    size_t num_matches = hasher->FindAllMatches(
+        ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
+        matches);
+    if (num_matches > 0 &&
+        matches[num_matches - 1].length() > kMaxZopfliLen) {
+      matches[0] = matches[num_matches - 1];
+      num_matches = 1;
+    }
+    UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
+                max_backward_limit, dist_cache, num_matches, matches,
+                model, &queue, nodes);
+    if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
+      for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
+        ++i;
+        if (matches[0].length() - j < 64 &&
+            num_bytes - i >= kMaxTreeCompLength) {
+          hasher->Store(ringbuffer, ringbuffer_mask, position + i);
+        }
+      }
+      queue.Clear();
+    }
+  }
+  delete model;
+  ComputeShortestPathFromNodes(num_bytes, nodes, path);
+}
+
+template<typename Hasher>
+void CreateBackwardReferences(size_t num_bytes,
+                              size_t position,
+                              bool is_last,
+                              const uint8_t* ringbuffer,
+                              size_t ringbuffer_mask,
+                              const int quality,
+                              const int lgwin,
+                              Hasher* hasher,
+                              int* dist_cache,
+                              size_t* last_insert_len,
+                              Command* commands,
+                              size_t* num_commands,
+                              size_t* num_literals) {
+  // Set maximum distance, see section 9.1. of the spec.
+  const size_t max_backward_limit = (1 << lgwin) - 16;
+
+  // Choose which init method is faster.
+  // memset is about 100 times faster than hasher->InitForData().
+  const size_t kMaxBytesForPartialHashInit = Hasher::kHashMapSize >> 7;
+  if (position == 0 && is_last && num_bytes <= kMaxBytesForPartialHashInit) {
+    hasher->InitForData(ringbuffer, num_bytes);
+  } else {
+    hasher->Init();
+  }
+  if (num_bytes >= 3 && position >= 3) {
+    // Prepare the hashes for three last bytes of the last write.
+    // These could not be calculated before, since they require knowledge
+    // of both the previous and the current block.
+    hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
+                  static_cast<uint32_t>(position - 3));
+    hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
+                  static_cast<uint32_t>(position - 2));
+    hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
+                  static_cast<uint32_t>(position - 1));
+  }
+  const Command * const orig_commands = commands;
+  size_t insert_length = *last_insert_len;
+  size_t i = position & ringbuffer_mask;
+  const size_t i_diff = position - i;
+  const size_t i_end = i + num_bytes;
+
+  // For speed up heuristics for random data.
+  const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
+  size_t apply_random_heuristics = i + random_heuristics_window_size;
+
+  // Minimum score to accept a backward reference.
+  const double kMinScore = 4.0;
+
+  while (i + Hasher::kHashTypeLength - 1 < i_end) {
+    size_t max_length = i_end - i;
+    size_t max_distance = std::min(i + i_diff, max_backward_limit);
+    size_t best_len = 0;
+    size_t best_len_code = 0;
+    size_t best_dist = 0;
+    double best_score = kMinScore;
+    bool match_found = hasher->FindLongestMatch(
+        ringbuffer, ringbuffer_mask,
+        dist_cache, static_cast<uint32_t>(i + i_diff), max_length, max_distance,
+        &best_len, &best_len_code, &best_dist, &best_score);
+    if (match_found) {
+      // Found a match. Let's look for something even better ahead.
+      int delayed_backward_references_in_row = 0;
+      for (;;) {
+        --max_length;
+        size_t best_len_2 =
+            quality < 5 ? std::min(best_len - 1, max_length) : 0;
+        size_t best_len_code_2 = 0;
+        size_t best_dist_2 = 0;
+        double best_score_2 = kMinScore;
+        max_distance = std::min(i + i_diff + 1, max_backward_limit);
+        match_found = hasher->FindLongestMatch(
+            ringbuffer, ringbuffer_mask,
+            dist_cache, static_cast<uint32_t>(i + i_diff + 1),
+            max_length, max_distance,
+            &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
+        double cost_diff_lazy = 7.0;
+        if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
+          // Ok, let's just write one byte for now and start a match from the
+            // next byte.
+          ++i;
+          ++insert_length;
+          best_len = best_len_2;
+          best_len_code = best_len_code_2;
+          best_dist = best_dist_2;
+          best_score = best_score_2;
+          if (++delayed_backward_references_in_row < 4) {
+            continue;
+          }
+        }
+        break;
+      }
+      apply_random_heuristics =
+          i + 2 * best_len + random_heuristics_window_size;
+      max_distance = std::min(i + i_diff, max_backward_limit);
+      // The first 16 codes are special shortcodes, and the minimum offset is 1.
+      size_t distance_code =
+          ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
+      if (best_dist <= max_distance && distance_code > 0) {
+        dist_cache[3] = dist_cache[2];
+        dist_cache[2] = dist_cache[1];
+        dist_cache[1] = dist_cache[0];
+        dist_cache[0] = static_cast<int>(best_dist);
+      }
+      Command cmd(insert_length, best_len, best_len_code, distance_code);
+      *commands++ = cmd;
+      *num_literals += insert_length;
+      insert_length = 0;
+      // Put the hash keys into the table, if there are enough
+      // bytes left.
+      for (size_t j = 2; j < best_len; ++j) {
+        hasher->Store(&ringbuffer[i + j],
+                      static_cast<uint32_t>(i + i_diff + j));
+      }
+      i += best_len;
+    } else {
+      ++insert_length;
+      ++i;
+      // If we have not seen matches for a long time, we can skip some
+      // match lookups. Unsuccessful match lookups are very very expensive
+      // and this kind of a heuristic speeds up compression quite
+      // a lot.
+      if (i > apply_random_heuristics) {
+        // Going through uncompressible data, jump.
+        if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
+          // It is quite a long time since we saw a copy, so we assume
+          // that this data is not compressible, and store hashes less
+          // often. Hashes of non compressible data are less likely to
+          // turn out to be useful in the future, too, so we store less of
+          // them to not to flood out the hash table of good compressible
+          // data.
+          size_t i_jump = std::min(i + 16, i_end - 4);
+          for (; i < i_jump; i += 4) {
+            hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
+            insert_length += 4;
+          }
+        } else {
+          size_t i_jump = std::min(i + 8, i_end - 3);
+          for (; i < i_jump; i += 2) {
+            hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
+            insert_length += 2;
+          }
+        }
+      }
+    }
+  }
+  insert_length += i_end - i;
+  *last_insert_len = insert_length;
+  *num_commands += static_cast<size_t>(commands - orig_commands);
+}
+
+void CreateBackwardReferences(size_t num_bytes,
+                              size_t position,
+                              bool is_last,
+                              const uint8_t* ringbuffer,
+                              size_t ringbuffer_mask,
+                              const int quality,
+                              const int lgwin,
+                              Hashers* hashers,
+                              int hash_type,
+                              int* dist_cache,
+                              size_t* last_insert_len,
+                              Command* commands,
+                              size_t* num_commands,
+                              size_t* num_literals) {
+  bool zopflify = quality > 9;
+  if (zopflify) {
+    Hashers::H10* hasher = hashers->hash_h10;
+    hasher->Init(lgwin, position, num_bytes, is_last);
+    hasher->StitchToPreviousBlock(num_bytes, position,
+                                  ringbuffer, ringbuffer_mask);
+    // Set maximum distance, see section 9.1. of the spec.
+    const size_t max_backward_limit = (1 << lgwin) - 16;
+    if (quality == 10) {
+      std::vector<ZopfliNode> nodes(num_bytes + 1);
+      std::vector<uint32_t> path;
+      ZopfliComputeShortestPath(num_bytes, position,
+                                ringbuffer, ringbuffer_mask,
+                                max_backward_limit, dist_cache, hasher,
+                                &nodes[0], &path);
+      ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
+                           &nodes[0], dist_cache, last_insert_len, commands,
+                           num_literals);
+      *num_commands += path.size();
+      return;
+    }
+    std::vector<uint32_t> num_matches(num_bytes);
+    std::vector<BackwardMatch> matches(4 * num_bytes);
+    size_t cur_match_pos = 0;
+    for (size_t i = 0; i + 3 < num_bytes; ++i) {
+      size_t max_distance = std::min(position + i, max_backward_limit);
+      size_t max_length = num_bytes - i;
+      // Ensure that we have enough free slots.
+      if (matches.size() < cur_match_pos + Hashers::H10::kMaxNumMatches) {
+        matches.resize(cur_match_pos + Hashers::H10::kMaxNumMatches);
+      }
+      size_t num_found_matches = hasher->FindAllMatches(
+          ringbuffer, ringbuffer_mask, position + i, max_length, max_distance,
+          &matches[cur_match_pos]);
+      const size_t cur_match_end = cur_match_pos + num_found_matches;
+      for (size_t j = cur_match_pos; j + 1 < cur_match_end; ++j) {
+        assert(matches[j].length() < matches[j + 1].length());
+        assert(matches[j].distance > max_distance ||
+               matches[j].distance <= matches[j + 1].distance);
+      }
+      num_matches[i] = static_cast<uint32_t>(num_found_matches);
+      if (num_found_matches > 0) {
+        const size_t match_len = matches[cur_match_end - 1].length();
+        if (match_len > kMaxZopfliLen) {
+          matches[cur_match_pos++] = matches[cur_match_end - 1];
+          num_matches[i] = 1;
+          for (size_t j = 1; j < match_len; ++j) {
+            ++i;
+            if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
+              hasher->Store(ringbuffer, ringbuffer_mask, position + i);
+            }
+            num_matches[i] = 0;
+          }
+        } else {
+          cur_match_pos = cur_match_end;
+        }
+      }
+    }
+    size_t orig_num_literals = *num_literals;
+    size_t orig_last_insert_len = *last_insert_len;
+    int orig_dist_cache[4] = {
+      dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
+    };
+    size_t orig_num_commands = *num_commands;
+    static const size_t kIterations = 2;
+    for (size_t i = 0; i < kIterations; i++) {
+      ZopfliCostModel model;
+      if (i == 0) {
+        model.SetFromLiteralCosts(num_bytes, position,
+                                  ringbuffer, ringbuffer_mask);
+      } else {
+        model.SetFromCommands(num_bytes, position,
+                              ringbuffer, ringbuffer_mask,
+                              commands, *num_commands - orig_num_commands,
+                              orig_last_insert_len);
+      }
+      *num_commands = orig_num_commands;
+      *num_literals = orig_num_literals;
+      *last_insert_len = orig_last_insert_len;
+      memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
+      std::vector<ZopfliNode> nodes(num_bytes + 1);
+      std::vector<uint32_t> path;
+      ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
+                    max_backward_limit, dist_cache, model, num_matches, matches,
+                    &nodes[0], &path);
+      ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
+                           &nodes[0], dist_cache, last_insert_len, commands,
+                           num_literals);
+      *num_commands += path.size();
+    }
+    return;
+  }
+
+  switch (hash_type) {
+    case 2:
+      CreateBackwardReferences<Hashers::H2>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h2, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    case 3:
+      CreateBackwardReferences<Hashers::H3>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h3, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    case 4:
+      CreateBackwardReferences<Hashers::H4>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h4, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    case 5:
+      CreateBackwardReferences<Hashers::H5>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h5, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    case 6:
+      CreateBackwardReferences<Hashers::H6>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h6, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    case 7:
+      CreateBackwardReferences<Hashers::H7>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h7, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    case 8:
+      CreateBackwardReferences<Hashers::H8>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h8, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    case 9:
+      CreateBackwardReferences<Hashers::H9>(
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          quality, lgwin, hashers->hash_h9, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
+      break;
+    default:
+      break;
+  }
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/backward_references.h
+++ b/modules/brotli/enc/backward_references.h
@ -0,0 +1,116 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Function to find backward reference copies.
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include <vector>
+
+#include "./hash.h"
+#include "./command.h"
+#include "./types.h"
+
+namespace brotli {
+
+// "commands" points to the next output command to write to, "*num_commands" is
+// initially the total amount of commands output by previous
+// CreateBackwardReferences calls, and must be incremented by the amount written
+// by this call.
+void CreateBackwardReferences(size_t num_bytes,
+                              size_t position,
+                              bool is_last,
+                              const uint8_t* ringbuffer,
+                              size_t ringbuffer_mask,
+                              const int quality,
+                              const int lgwin,
+                              Hashers* hashers,
+                              int hash_type,
+                              int* dist_cache,
+                              size_t* last_insert_len,
+                              Command* commands,
+                              size_t* num_commands,
+                              size_t* num_literals);
+
+static const float kInfinity = std::numeric_limits<float>::infinity();
+
+struct ZopfliNode {
+  ZopfliNode(void) : length(1),
+                     distance(0),
+                     insert_length(0),
+                     cost(kInfinity) {}
+
+  inline uint32_t copy_length() const {
+    return length & 0xffffff;
+  }
+
+  inline uint32_t length_code() const {
+    const uint32_t modifier = length >> 24;
+    return copy_length() + 9u - modifier;
+  }
+
+  inline uint32_t copy_distance() const {
+    return distance & 0x1ffffff;
+  }
+
+  inline uint32_t distance_code() const {
+    const uint32_t short_code = distance >> 25;
+    return short_code == 0 ? copy_distance() + 15 : short_code - 1;
+  }
+
+  inline uint32_t command_length() const {
+    return copy_length() + insert_length;
+  }
+
+  // best length to get up to this byte (not including this byte itself)
+  // highest 8 bit is used to reconstruct the length code
+  uint32_t length;
+  // distance associated with the length
+  // highest 7 bit contains distance short code + 1 (or zero if no short code)
+  uint32_t distance;
+  // number of literal inserts before this copy
+  uint32_t insert_length;
+  // smallest cost to get to this byte from the beginning, as found so far
+  float cost;
+};
+
+// Computes the shortest path of commands from position to at most
+// position + num_bytes.
+//
+// On return, path->size() is the number of commands found and path[i] is the
+// length of the ith command (copy length plus insert length).
+// Note that the sum of the lengths of all commands can be less than num_bytes.
+//
+// On return, the nodes[0..num_bytes] array will have the following
+// "ZopfliNode array invariant":
+// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
+//   (1) nodes[i].copy_length() >= 2
+//   (2) nodes[i].command_length() <= i and
+//   (3) nodes[i - nodes[i].command_length()].cost < kInfinity
+void ZopfliComputeShortestPath(size_t num_bytes,
+                               size_t position,
+                               const uint8_t* ringbuffer,
+                               size_t ringbuffer_mask,
+                               const size_t max_backward_limit,
+                               const int* dist_cache,
+                               Hashers::H10* hasher,
+                               ZopfliNode* nodes,
+                               std::vector<uint32_t>* path);
+
+void ZopfliCreateCommands(const size_t num_bytes,
+                          const size_t block_start,
+                          const size_t max_backward_limit,
+                          const std::vector<uint32_t>& path,
+                          const ZopfliNode* nodes,
+                          int* dist_cache,
+                          size_t* last_insert_len,
+                          Command* commands,
+                          size_t* num_literals);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BACKWARD_REFERENCES_H_
--- a/modules/brotli/enc/bit_cost.h
+++ b/modules/brotli/enc/bit_cost.h
@ -0,0 +1,161 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Functions to estimate the bit cost of Huffman trees.
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./types.h"
+
+namespace brotli {
+
+static inline double ShannonEntropy(const uint32_t *population, size_t size,
+                                    size_t *total) {
+  size_t sum = 0;
+  double retval = 0;
+  const uint32_t *population_end = population + size;
+  size_t p;
+  if (size & 1) {
+    goto odd_number_of_elements_left;
+  }
+  while (population < population_end) {
+    p = *population++;
+    sum += p;
+    retval -= static_cast<double>(p) * FastLog2(p);
+ odd_number_of_elements_left:
+    p = *population++;
+    sum += p;
+    retval -= static_cast<double>(p) * FastLog2(p);
+  }
+  if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
+  *total = sum;
+  return retval;
+}
+
+static inline double BitsEntropy(const uint32_t *population, size_t size) {
+  size_t sum;
+  double retval = ShannonEntropy(population, size, &sum);
+  if (retval < sum) {
+    // At least one bit per literal is needed.
+    retval = static_cast<double>(sum);
+  }
+  return retval;
+}
+
+template<int kSize>
+double PopulationCost(const Histogram<kSize>& histogram) {
+  static const double kOneSymbolHistogramCost = 12;
+  static const double kTwoSymbolHistogramCost = 20;
+  static const double kThreeSymbolHistogramCost = 28;
+  static const double kFourSymbolHistogramCost = 37;
+  if (histogram.total_count_ == 0) {
+    return kOneSymbolHistogramCost;
+  }
+  int count = 0;
+  int s[5];
+  for (int i = 0; i < kSize; ++i) {
+    if (histogram.data_[i] > 0) {
+      s[count] = i;
+      ++count;
+      if (count > 4) break;
+    }
+  }
+  if (count == 1) {
+    return kOneSymbolHistogramCost;
+  }
+  if (count == 2) {
+    return (kTwoSymbolHistogramCost +
+            static_cast<double>(histogram.total_count_));
+  }
+  if (count == 3) {
+    const uint32_t histo0 = histogram.data_[s[0]];
+    const uint32_t histo1 = histogram.data_[s[1]];
+    const uint32_t histo2 = histogram.data_[s[2]];
+    const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
+    return (kThreeSymbolHistogramCost +
+            2 * (histo0 + histo1 + histo2) - histomax);
+  }
+  if (count == 4) {
+    uint32_t histo[4];
+    for (int i = 0; i < 4; ++i) {
+      histo[i] = histogram.data_[s[i]];
+    }
+    // Sort
+    for (int i = 0; i < 4; ++i) {
+      for (int j = i + 1; j < 4; ++j) {
+        if (histo[j] > histo[i]) {
+          std::swap(histo[j], histo[i]);
+        }
+      }
+    }
+    const uint32_t h23 = histo[2] + histo[3];
+    const uint32_t histomax = std::max(h23, histo[0]);
+    return (kFourSymbolHistogramCost +
+            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
+  }
+
+  // In this loop we compute the entropy of the histogram and simultaneously
+  // build a simplified histogram of the code length codes where we use the
+  // zero repeat code 17, but we don't use the non-zero repeat code 16.
+  double bits = 0;
+  size_t max_depth = 1;
+  uint32_t depth_histo[kCodeLengthCodes] = { 0 };
+  const double log2total = FastLog2(histogram.total_count_);
+  for (size_t i = 0; i < kSize;) {
+    if (histogram.data_[i] > 0) {
+      // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+      //                          =  log2(total_count) - log2(count(symbol))
+      double log2p = log2total - FastLog2(histogram.data_[i]);
+      // Approximate the bit depth by round(-log2(P(symbol)))
+      size_t depth = static_cast<size_t>(log2p + 0.5);
+      bits += histogram.data_[i] * log2p;
+      if (depth > 15) {
+        depth = 15;
+      }
+      if (depth > max_depth) {
+        max_depth = depth;
+      }
+      ++depth_histo[depth];
+      ++i;
+    } else {
+      // Compute the run length of zeros and add the appropriate number of 0 and
+      // 17 code length codes to the code length code histogram.
+      uint32_t reps = 1;
+      for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
+        ++reps;
+      }
+      i += reps;
+      if (i == kSize) {
+        // Don't add any cost for the last zero run, since these are encoded
+        // only implicitly.
+        break;
+      }
+      if (reps < 3) {
+        depth_histo[0] += reps;
+      } else {
+        reps -= 2;
+        while (reps > 0) {
+          ++depth_histo[17];
+          // Add the 3 extra bits for the 17 code length code.
+          bits += 3;
+          reps >>= 3;
+        }
+      }
+    }
+  }
+  // Add the estimated encoding cost of the code length code histogram.
+  bits += static_cast<double>(18 + 2 * max_depth);
+  // Add the entropy of the code length code histogram.
+  bits += BitsEntropy(depth_histo, kCodeLengthCodes);
+  return bits;
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BIT_COST_H_
--- a/modules/brotli/enc/block_splitter.cc
+++ b/modules/brotli/enc/block_splitter.cc
@ -0,0 +1,505 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Block split point selection utilities.
+
+#include "./block_splitter.h"
+
+#include <assert.h>
+#include <math.h>
+
+#include <algorithm>
+#include <cstring>
+#include <vector>
+
+#include "./cluster.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+static const size_t kMaxLiteralHistograms = 100;
+static const size_t kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 28.1;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const size_t kLiteralStrideLength = 70;
+static const size_t kCommandStrideLength = 40;
+static const size_t kSymbolsPerLiteralHistogram = 544;
+static const size_t kSymbolsPerCommandHistogram = 530;
+static const size_t kSymbolsPerDistanceHistogram = 544;
+static const size_t kMinLengthForBlockSplitting = 128;
+static const size_t kIterMulForRefining = 2;
+static const size_t kMinItersForRefining = 100;
+
+void CopyLiteralsToByteArray(const Command* cmds,
+                             const size_t num_commands,
+                             const uint8_t* data,
+                             const size_t offset,
+                             const size_t mask,
+                             std::vector<uint8_t>* literals) {
+  // Count how many we have.
+  size_t total_length = 0;
+  for (size_t i = 0; i < num_commands; ++i) {
+    total_length += cmds[i].insert_len_;
+  }
+  if (total_length == 0) {
+    return;
+  }
+
+  // Allocate.
+  literals->resize(total_length);
+
+  // Loop again, and copy this time.
+  size_t pos = 0;
+  size_t from_pos = offset & mask;
+  for (size_t i = 0; i < num_commands && pos < total_length; ++i) {
+    size_t insert_len = cmds[i].insert_len_;
+    if (from_pos + insert_len > mask) {
+      size_t head_size = mask + 1 - from_pos;
+      memcpy(&(*literals)[pos], data + from_pos, head_size);
+      from_pos = 0;
+      pos += head_size;
+      insert_len -= head_size;
+    }
+    if (insert_len > 0) {
+      memcpy(&(*literals)[pos], data + from_pos, insert_len);
+      pos += insert_len;
+    }
+    from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
+  }
+}
+
+inline static unsigned int MyRand(unsigned int* seed) {
+  *seed *= 16807U;
+  if (*seed == 0) {
+    *seed = 1;
+  }
+  return *seed;
+}
+
+template<typename HistogramType, typename DataType>
+void InitialEntropyCodes(const DataType* data, size_t length,
+                         size_t stride,
+                         size_t num_histograms,
+                         HistogramType* histograms) {
+  for (size_t i = 0; i < num_histograms; ++i) {
+    histograms[i].Clear();
+  }
+  unsigned int seed = 7;
+  size_t block_length = length / num_histograms;
+  for (size_t i = 0; i < num_histograms; ++i) {
+    size_t pos = length * i / num_histograms;
+    if (i != 0) {
+      pos += MyRand(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    histograms[i].Add(data + pos, stride);
+  }
+}
+
+template<typename HistogramType, typename DataType>
+void RandomSample(unsigned int* seed,
+                  const DataType* data,
+                  size_t length,
+                  size_t stride,
+                  HistogramType* sample) {
+  size_t pos = 0;
+  if (stride >= length) {
+    pos = 0;
+    stride = length;
+  } else {
+    pos = MyRand(seed) % (length - stride + 1);
+  }
+  sample->Add(data + pos, stride);
+}
+
+template<typename HistogramType, typename DataType>
+void RefineEntropyCodes(const DataType* data, size_t length,
+                        size_t stride,
+                        size_t num_histograms,
+                        HistogramType* histograms) {
+  size_t iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  unsigned int seed = 7;
+  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
+  for (size_t iter = 0; iter < iters; ++iter) {
+    HistogramType sample;
+    RandomSample(&seed, data, length, stride, &sample);
+    size_t ix = iter % num_histograms;
+    histograms[ix].AddHistogram(sample);
+  }
+}
+
+inline static double BitCost(size_t count) {
+  return count == 0 ? -2.0 : FastLog2(count);
+}
+
+// Assigns a block id from the range [0, vec.size()) to each data element
+// in data[0..length) and fills in block_id[0..length) with the assigned values.
+// Returns the number of blocks, i.e. one plus the number of block switches.
+template<typename DataType, int kSize>
+size_t FindBlocks(const DataType* data, const size_t length,
+                  const double block_switch_bitcost,
+                  const size_t num_histograms,
+                  const Histogram<kSize>* histograms,
+                  double* insert_cost,
+                  double* cost,
+                  uint8_t* switch_signal,
+                  uint8_t *block_id) {
+  if (num_histograms <= 1) {
+    for (size_t i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return 1;
+  }
+  const size_t bitmaplen = (num_histograms + 7) >> 3;
+  assert(num_histograms <= 256);
+  memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
+  for (size_t j = 0; j < num_histograms; ++j) {
+    insert_cost[j] = FastLog2(static_cast<uint32_t>(
+        histograms[j].total_count_));
+  }
+  for (size_t i = kSize; i != 0;) {
+    --i;
+    for (size_t j = 0; j < num_histograms; ++j) {
+      insert_cost[i * num_histograms + j] =
+          insert_cost[j] - BitCost(histograms[j].data_[i]);
+    }
+  }
+  memset(cost, 0, sizeof(cost[0]) * num_histograms);
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
+  // After each iteration of this loop, cost[k] will contain the difference
+  // between the minimum cost of arriving at the current byte position using
+  // entropy code k, and the minimum cost of arriving at the current byte
+  // position. This difference is capped at the block switch cost, and if it
+  // reaches block switch cost, it means that when we trace back from the last
+  // position, we need to switch here.
+  for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
+    size_t ix = byte_ix * bitmaplen;
+    size_t insert_cost_ix = data[byte_ix] * num_histograms;
+    double min_cost = 1e99;
+    for (size_t k = 0; k < num_histograms; ++k) {
+      // We are coding the symbol in data[byte_ix] with entropy code k.
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = static_cast<uint8_t>(k);
+      }
+    }
+    double block_switch_cost = block_switch_bitcost;
+    // More blocks for the beginning.
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
+    }
+    for (size_t k = 0; k < num_histograms; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        cost[k] = block_switch_cost;
+        const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
+        assert((k >> 3) < bitmaplen);
+        switch_signal[ix + (k >> 3)] |= mask;
+      }
+    }
+  }
+  // Now trace back from the last position and switch at the marked places.
+  size_t byte_ix = length - 1;
+  size_t ix = byte_ix * bitmaplen;
+  uint8_t cur_id = block_id[byte_ix];
+  size_t num_blocks = 1;
+  while (byte_ix > 0) {
+    --byte_ix;
+    ix -= bitmaplen;
+    const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
+    assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
+    if (switch_signal[ix + (cur_id >> 3)] & mask) {
+      if (cur_id != block_id[byte_ix]) {
+        cur_id = block_id[byte_ix];
+        ++num_blocks;
+      }
+    }
+    block_id[byte_ix] = cur_id;
+  }
+  return num_blocks;
+}
+
+static size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
+                            uint16_t* new_id, const size_t num_histograms) {
+  static const uint16_t kInvalidId = 256;
+  for (size_t i = 0; i < num_histograms; ++i) {
+    new_id[i] = kInvalidId;
+  }
+  uint16_t next_id = 0;
+  for (size_t i = 0; i < length; ++i) {
+    assert(block_ids[i] < num_histograms);
+    if (new_id[block_ids[i]] == kInvalidId) {
+      new_id[block_ids[i]] = next_id++;
+    }
+  }
+  for (size_t i = 0; i < length; ++i) {
+    block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
+    assert(block_ids[i] < num_histograms);
+  }
+  assert(next_id <= num_histograms);
+  return next_id;
+}
+
+template<typename HistogramType, typename DataType>
+void BuildBlockHistograms(const DataType* data, const size_t length,
+                          const uint8_t* block_ids,
+                          const size_t num_histograms,
+                          HistogramType* histograms) {
+  for (size_t i = 0; i < num_histograms; ++i) {
+    histograms[i].Clear();
+  }
+  for (size_t i = 0; i < length; ++i) {
+    histograms[block_ids[i]].Add(data[i]);
+  }
+}
+
+template<typename HistogramType, typename DataType>
+void ClusterBlocks(const DataType* data, const size_t length,
+                   const size_t num_blocks,
+                   uint8_t* block_ids,
+                   BlockSplit* split) {
+  static const size_t kMaxNumberOfBlockTypes = 256;
+  static const size_t kHistogramsPerBatch = 64;
+  static const size_t kClustersPerBatch = 16;
+  std::vector<uint32_t> histogram_symbols(num_blocks);
+  std::vector<uint32_t> block_lengths(num_blocks);
+
+  size_t block_idx = 0;
+  for (size_t i = 0; i < length; ++i) {
+    assert(block_idx < num_blocks);
+    ++block_lengths[block_idx];
+    if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
+      ++block_idx;
+    }
+  }
+  assert(block_idx == num_blocks);
+
+  const size_t expected_num_clusters =
+      kClustersPerBatch *
+      (num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
+  std::vector<HistogramType> all_histograms;
+  std::vector<uint32_t> cluster_size;
+  all_histograms.reserve(expected_num_clusters);
+  cluster_size.reserve(expected_num_clusters);
+  size_t num_clusters = 0;
+  std::vector<HistogramType> histograms(
+      std::min(num_blocks, kHistogramsPerBatch));
+  size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
+  std::vector<HistogramPair> pairs(max_num_pairs + 1);
+  size_t pos = 0;
+  for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
+    const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
+    uint32_t sizes[kHistogramsPerBatch];
+    uint32_t clusters[kHistogramsPerBatch];
+    uint32_t symbols[kHistogramsPerBatch];
+    uint32_t remap[kHistogramsPerBatch];
+    for (size_t j = 0; j < num_to_combine; ++j) {
+      histograms[j].Clear();
+      for (size_t k = 0; k < block_lengths[i + j]; ++k) {
+        histograms[j].Add(data[pos++]);
+      }
+      histograms[j].bit_cost_ = PopulationCost(histograms[j]);
+      symbols[j] = clusters[j] = static_cast<uint32_t>(j);
+      sizes[j] = 1;
+    }
+    size_t num_new_clusters = HistogramCombine(
+        &histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
+        num_to_combine, kHistogramsPerBatch, max_num_pairs);
+    for (size_t j = 0; j < num_new_clusters; ++j) {
+      all_histograms.push_back(histograms[clusters[j]]);
+      cluster_size.push_back(sizes[clusters[j]]);
+      remap[clusters[j]] = static_cast<uint32_t>(j);
+    }
+    for (size_t j = 0; j < num_to_combine; ++j) {
+      histogram_symbols[i + j] =
+          static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
+    }
+    num_clusters += num_new_clusters;
+    assert(num_clusters == cluster_size.size());
+    assert(num_clusters == all_histograms.size());
+  }
+
+  max_num_pairs =
+      std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
+  pairs.resize(max_num_pairs + 1);
+
+  std::vector<uint32_t> clusters(num_clusters);
+  for (size_t i = 0; i < num_clusters; ++i) {
+    clusters[i] = static_cast<uint32_t>(i);
+  }
+  size_t num_final_clusters =
+      HistogramCombine(&all_histograms[0], &cluster_size[0],
+                       &histogram_symbols[0],
+                       &clusters[0], &pairs[0], num_clusters,
+                       num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
+
+  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
+  std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
+  uint32_t next_index = 0;
+  pos = 0;
+  for (size_t i = 0; i < num_blocks; ++i) {
+    HistogramType histo;
+    for (size_t j = 0; j < block_lengths[i]; ++j) {
+      histo.Add(data[pos++]);
+    }
+    uint32_t best_out =
+        i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
+    double best_bits = HistogramBitCostDistance(
+        histo, all_histograms[best_out]);
+    for (size_t j = 0; j < num_final_clusters; ++j) {
+      const double cur_bits = HistogramBitCostDistance(
+          histo, all_histograms[clusters[j]]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = clusters[j];
+      }
+    }
+    histogram_symbols[i] = best_out;
+    if (new_index[best_out] == kInvalidIndex) {
+      new_index[best_out] = next_index++;
+    }
+  }
+  uint8_t max_type = 0;
+  uint32_t cur_length = 0;
+  block_idx = 0;
+  split->types.resize(num_blocks);
+  split->lengths.resize(num_blocks);
+  for (size_t i = 0; i < num_blocks; ++i) {
+    cur_length += block_lengths[i];
+    if (i + 1 == num_blocks ||
+        histogram_symbols[i] != histogram_symbols[i + 1]) {
+      const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
+      split->types[block_idx] = id;
+      split->lengths[block_idx] = cur_length;
+      max_type = std::max(max_type, id);
+      cur_length = 0;
+      ++block_idx;
+    }
+  }
+  split->types.resize(block_idx);
+  split->lengths.resize(block_idx);
+  split->num_types = static_cast<size_t>(max_type) + 1;
+}
+
+template<int kSize, typename DataType>
+void SplitByteVector(const std::vector<DataType>& data,
+                     const size_t literals_per_histogram,
+                     const size_t max_histograms,
+                     const size_t sampling_stride_length,
+                     const double block_switch_cost,
+                     BlockSplit* split) {
+  if (data.empty()) {
+    split->num_types = 1;
+    return;
+  } else if (data.size() < kMinLengthForBlockSplitting) {
+    split->num_types = 1;
+    split->types.push_back(0);
+    split->lengths.push_back(static_cast<uint32_t>(data.size()));
+    return;
+  }
+  size_t num_histograms = data.size() / literals_per_histogram + 1;
+  if (num_histograms > max_histograms) {
+    num_histograms = max_histograms;
+  }
+  Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
+  // Find good entropy codes.
+  InitialEntropyCodes(&data[0], data.size(),
+                      sampling_stride_length,
+                      num_histograms, histograms);
+  RefineEntropyCodes(&data[0], data.size(),
+                     sampling_stride_length,
+                     num_histograms, histograms);
+  // Find a good path through literals with the good entropy codes.
+  std::vector<uint8_t> block_ids(data.size());
+  size_t num_blocks;
+  const size_t bitmaplen = (num_histograms + 7) >> 3;
+  double* insert_cost = new double[kSize * num_histograms];
+  double *cost = new double[num_histograms];
+  uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
+  uint16_t* new_id = new uint16_t[num_histograms];
+  for (size_t i = 0; i < 10; ++i) {
+    num_blocks = FindBlocks(&data[0], data.size(),
+                            block_switch_cost,
+                            num_histograms, histograms,
+                            insert_cost, cost, switch_signal,
+                            &block_ids[0]);
+    num_histograms = RemapBlockIds(&block_ids[0], data.size(),
+                                   new_id, num_histograms);
+    BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
+                         num_histograms, histograms);
+  }
+  delete[] insert_cost;
+  delete[] cost;
+  delete[] switch_signal;
+  delete[] new_id;
+  delete[] histograms;
+  ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
+                                   &block_ids[0], split);
+}
+
+void SplitBlock(const Command* cmds,
+                const size_t num_commands,
+                const uint8_t* data,
+                const size_t pos,
+                const size_t mask,
+                BlockSplit* literal_split,
+                BlockSplit* insert_and_copy_split,
+                BlockSplit* dist_split) {
+  {
+    // Create a continuous array of literals.
+    std::vector<uint8_t> literals;
+    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
+    // Create the block split on the array of literals.
+    // Literal histograms have alphabet size 256.
+    SplitByteVector<256>(
+        literals,
+        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+        kLiteralStrideLength, kLiteralBlockSwitchCost,
+        literal_split);
+  }
+
+  {
+    // Compute prefix codes for commands.
+    std::vector<uint16_t> insert_and_copy_codes(num_commands);
+    for (size_t i = 0; i < num_commands; ++i) {
+      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
+    }
+    // Create the block split on the array of command prefixes.
+    SplitByteVector<kNumCommandPrefixes>(
+        insert_and_copy_codes,
+        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+        kCommandStrideLength, kCommandBlockSwitchCost,
+        insert_and_copy_split);
+  }
+
+  {
+    // Create a continuous array of distance prefixes.
+    std::vector<uint16_t> distance_prefixes(num_commands);
+    size_t pos = 0;
+    for (size_t i = 0; i < num_commands; ++i) {
+      const Command& cmd = cmds[i];
+      if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
+        distance_prefixes[pos++] = cmd.dist_prefix_;
+      }
+    }
+    distance_prefixes.resize(pos);
+    // Create the block split on the array of distance prefixes.
+    SplitByteVector<kNumDistancePrefixes>(
+        distance_prefixes,
+        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+        kCommandStrideLength, kDistanceBlockSwitchCost,
+        dist_split);
+  }
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/block_splitter.h
+++ b/modules/brotli/enc/block_splitter.h
@ -0,0 +1,61 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Block split point selection utilities.
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include <vector>
+
+#include "./command.h"
+#include "./metablock.h"
+#include "./types.h"
+
+namespace brotli {
+
+struct BlockSplitIterator {
+  explicit BlockSplitIterator(const BlockSplit& split)
+      : split_(split), idx_(0), type_(0), length_(0) {
+    if (!split.lengths.empty()) {
+      length_ = split.lengths[0];
+    }
+  }
+
+  void Next(void) {
+    if (length_ == 0) {
+      ++idx_;
+      type_ = split_.types[idx_];
+      length_ = split_.lengths[idx_];
+    }
+    --length_;
+  }
+
+  const BlockSplit& split_;
+  size_t idx_;
+  size_t type_;
+  size_t length_;
+};
+
+void CopyLiteralsToByteArray(const Command* cmds,
+                             const size_t num_commands,
+                             const uint8_t* data,
+                             const size_t offset,
+                             const size_t mask,
+                             std::vector<uint8_t>* literals);
+
+void SplitBlock(const Command* cmds,
+                const size_t num_commands,
+                const uint8_t* data,
+                const size_t offset,
+                const size_t mask,
+                BlockSplit* literal_split,
+                BlockSplit* insert_and_copy_split,
+                BlockSplit* dist_split);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BLOCK_SPLITTER_H_
--- a/modules/brotli/enc/brotli_bit_stream.cc
+++ b/modules/brotli/enc/brotli_bit_stream.cc
--- a/modules/brotli/enc/brotli_bit_stream.h
+++ b/modules/brotli/enc/brotli_bit_stream.h
@ -0,0 +1,179 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Functions to convert brotli-related data structures into the
+// brotli bit stream. The functions here operate under
+// assumption that there is enough space in the storage, i.e., there are
+// no out-of-range checks anywhere.
+//
+// These functions do bit addressing into a byte array. The byte array
+// is called "storage" and the index to the bit is called storage_ix
+// in function arguments.
+
+#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
+#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
+
+#include <vector>
+
+#include "./entropy_encode.h"
+#include "./metablock.h"
+#include "./types.h"
+
+namespace brotli {
+
+// All Store functions here will use a storage_ix, which is always the bit
+// position for the current storage.
+
+// Stores a number between 0 and 255.
+void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
+
+// Stores the compressed meta-block header.
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreCompressedMetaBlockHeader(bool final_block,
+                                    size_t length,
+                                    size_t* storage_ix,
+                                    uint8_t* storage);
+
+// Stores the uncompressed meta-block header.
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreUncompressedMetaBlockHeader(size_t length,
+                                      size_t* storage_ix,
+                                      uint8_t* storage);
+
+// Stores a context map where the histogram type is always the block type.
+void StoreTrivialContextMap(size_t num_types,
+                            size_t context_bits,
+                            HuffmanTree* tree,
+                            size_t* storage_ix,
+                            uint8_t* storage);
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+    const int num_codes,
+    const uint8_t *code_length_bitdepth,
+    size_t *storage_ix,
+    uint8_t *storage);
+
+void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
+                      size_t *storage_ix, uint8_t *storage);
+
+// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
+// bits[0:length] and stores the encoded tree to the bit stream.
+void BuildAndStoreHuffmanTree(const uint32_t *histogram,
+                              const size_t length,
+                              HuffmanTree* tree,
+                              uint8_t* depth,
+                              uint16_t* bits,
+                              size_t* storage_ix,
+                              uint8_t* storage);
+
+void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
+                                  const size_t histogram_total,
+                                  const size_t max_bits,
+                                  uint8_t* depth,
+                                  uint16_t* bits,
+                                  size_t* storage_ix,
+                                  uint8_t* storage);
+
+// Encodes the given context map to the bit stream. The number of different
+// histogram ids is given by num_clusters.
+void EncodeContextMap(const std::vector<uint32_t>& context_map,
+                      size_t num_clusters,
+                      HuffmanTree* tree,
+                      size_t* storage_ix, uint8_t* storage);
+
+// Data structure that stores everything that is needed to encode each block
+// switch command.
+struct BlockSplitCode {
+  std::vector<uint32_t> type_code;
+  std::vector<uint32_t> length_prefix;
+  std::vector<uint32_t> length_nextra;
+  std::vector<uint32_t> length_extra;
+  std::vector<uint8_t> type_depths;
+  std::vector<uint16_t> type_bits;
+  uint8_t length_depths[kNumBlockLenPrefixes];
+  uint16_t length_bits[kNumBlockLenPrefixes];
+};
+
+// Builds a BlockSplitCode data structure from the block split given by the
+// vector of block types and block lengths and stores it to the bit stream.
+void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
+                                 const std::vector<uint32_t>& lengths,
+                                 const size_t num_types,
+                                 BlockSplitCode* code,
+                                 size_t* storage_ix,
+                                 uint8_t* storage);
+
+// Stores the block switch command with index block_ix to the bit stream.
+void StoreBlockSwitch(const BlockSplitCode& code,
+                      const size_t block_ix,
+                      size_t* storage_ix,
+                      uint8_t* storage);
+
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreMetaBlock(const uint8_t* input,
+                    size_t start_pos,
+                    size_t length,
+                    size_t mask,
+                    uint8_t prev_byte,
+                    uint8_t prev_byte2,
+                    bool final_block,
+                    uint32_t num_direct_distance_codes,
+                    uint32_t distance_postfix_bits,
+                    ContextType literal_context_mode,
+                    const brotli::Command *commands,
+                    size_t n_commands,
+                    const MetaBlockSplit& mb,
+                    size_t *storage_ix,
+                    uint8_t *storage);
+
+// Stores the meta-block without doing any block splitting, just collects
+// one histogram per block category and uses that for entropy coding.
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreMetaBlockTrivial(const uint8_t* input,
+                           size_t start_pos,
+                           size_t length,
+                           size_t mask,
+                           bool is_last,
+                           const brotli::Command *commands,
+                           size_t n_commands,
+                           size_t *storage_ix,
+                           uint8_t *storage);
+
+// Same as above, but uses static prefix codes for histograms with a only a few
+// symbols, and uses static code length prefix codes for all other histograms.
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreMetaBlockFast(const uint8_t* input,
+                        size_t start_pos,
+                        size_t length,
+                        size_t mask,
+                        bool is_last,
+                        const brotli::Command *commands,
+                        size_t n_commands,
+                        size_t *storage_ix,
+                        uint8_t *storage);
+
+// This is for storing uncompressed blocks (simple raw storage of
+// bytes-as-bytes).
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreUncompressedMetaBlock(bool final_block,
+                                const uint8_t* input,
+                                size_t position, size_t mask,
+                                size_t len,
+                                size_t* storage_ix,
+                                uint8_t* storage);
+
+// Stores an empty metadata meta-block and syncs to a byte boundary.
+void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BROTLI_BIT_STREAM_H_
--- a/modules/brotli/enc/cluster.h
+++ b/modules/brotli/enc/cluster.h
@ -0,0 +1,330 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Functions for clustering similar histograms together.
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include <math.h>
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "./bit_cost.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./port.h"
+#include "./types.h"
+
+namespace brotli {
+
+struct HistogramPair {
+  uint32_t idx1;
+  uint32_t idx2;
+  double cost_combo;
+  double cost_diff;
+};
+
+inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
+  if (p1.cost_diff != p2.cost_diff) {
+    return p1.cost_diff > p2.cost_diff;
+  }
+  return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
+}
+
+// Returns entropy reduction of the context map when we combine two clusters.
+inline double ClusterCostDiff(size_t size_a, size_t size_b) {
+  size_t size_c = size_a + size_b;
+  return static_cast<double>(size_a) * FastLog2(size_a) +
+      static_cast<double>(size_b) * FastLog2(size_b) -
+      static_cast<double>(size_c) * FastLog2(size_c);
+}
+
+// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
+template<typename HistogramType>
+void CompareAndPushToQueue(const HistogramType* out,
+                           const uint32_t* cluster_size,
+                           uint32_t idx1, uint32_t idx2,
+                           size_t max_num_pairs,
+                           HistogramPair* pairs,
+                           size_t* num_pairs) {
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    uint32_t t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  bool store_pair = false;
+  HistogramPair p;
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    store_pair = true;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    store_pair = true;
+  } else {
+    double threshold = *num_pairs == 0 ? 1e99 :
+        std::max(0.0, pairs[0].cost_diff);
+    HistogramType combo = out[idx1];
+    combo.AddHistogram(out[idx2]);
+    double cost_combo = PopulationCost(combo);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      store_pair = true;
+    }
+  }
+  if (store_pair) {
+    p.cost_diff += p.cost_combo;
+    if (*num_pairs > 0 && pairs[0] < p) {
+      // Replace the top of the queue if needed.
+      if (*num_pairs < max_num_pairs) {
+        pairs[*num_pairs] = pairs[0];
+        ++(*num_pairs);
+      }
+      pairs[0] = p;
+    } else if (*num_pairs < max_num_pairs) {
+      pairs[*num_pairs] = p;
+      ++(*num_pairs);
+    }
+  }
+}
+
+template<typename HistogramType>
+size_t HistogramCombine(HistogramType* out,
+                        uint32_t* cluster_size,
+                        uint32_t* symbols,
+                        uint32_t* clusters,
+                        HistogramPair* pairs,
+                        size_t num_clusters,
+                        size_t symbols_size,
+                        size_t max_clusters,
+                        size_t max_num_pairs) {
+  double cost_diff_threshold = 0.0;
+  size_t min_cluster_size = 1;
+
+  // We maintain a vector of histogram pairs, with the property that the pair
+  // with the maximum bit cost reduction is the first.
+  size_t num_pairs = 0;
+  for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
+    for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
+      CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
+                            max_num_pairs, &pairs[0], &num_pairs);
+    }
+  }
+
+  while (num_clusters > min_cluster_size) {
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    // Take the best pair from the top of heap.
+    uint32_t best_idx1 = pairs[0].idx1;
+    uint32_t best_idx2 = pairs[0].idx2;
+    out[best_idx1].AddHistogram(out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (size_t i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (size_t i = 0; i < num_clusters; ++i) {
+      if (clusters[i] == best_idx2) {
+        memmove(&clusters[i], &clusters[i + 1],
+                (num_clusters - i - 1) * sizeof(clusters[0]));
+        break;
+      }
+    }
+    --num_clusters;
+    // Remove pairs intersecting the just combined best pair.
+    size_t copy_to_idx = 0;
+    for (size_t i = 0; i < num_pairs; ++i) {
+      HistogramPair& p = pairs[i];
+      if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
+          p.idx1 == best_idx2 || p.idx2 == best_idx2) {
+        // Remove invalid pair from the queue.
+        continue;
+      }
+      if (pairs[0] < p) {
+        // Replace the top of the queue if needed.
+        HistogramPair front = pairs[0];
+        pairs[0] = p;
+        pairs[copy_to_idx] = front;
+      } else {
+        pairs[copy_to_idx] = p;
+      }
+      ++copy_to_idx;
+    }
+    num_pairs = copy_to_idx;
+
+    // Push new pairs formed with the combined histogram to the heap.
+    for (size_t i = 0; i < num_clusters; ++i) {
+      CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
+                            max_num_pairs, &pairs[0], &num_pairs);
+    }
+  }
+  return num_clusters;
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving histogram from cur_symbol to candidate.
+template<typename HistogramType>
+double HistogramBitCostDistance(const HistogramType& histogram,
+                                const HistogramType& candidate) {
+  if (histogram.total_count_ == 0) {
+    return 0.0;
+  }
+  HistogramType tmp = histogram;
+  tmp.AddHistogram(candidate);
+  return PopulationCost(tmp) - candidate.bit_cost_;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// When called, clusters[0..num_clusters) contains the unique values from
+// symbols[0..in_size), but this property is not preserved in this function.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+template<typename HistogramType>
+void HistogramRemap(const HistogramType* in, size_t in_size,
+                    const uint32_t* clusters, size_t num_clusters,
+                    HistogramType* out, uint32_t* symbols) {
+  for (size_t i = 0; i < in_size; ++i) {
+    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
+    for (size_t j = 0; j < num_clusters; ++j) {
+      const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = clusters[j];
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  // Recompute each out based on raw and symbols.
+  for (size_t j = 0; j < num_clusters; ++j) {
+    out[clusters[j]].Clear();
+  }
+  for (size_t i = 0; i < in_size; ++i) {
+    out[symbols[i]].AddHistogram(in[i]);
+  }
+}
+
+// Reorders elements of the out[0..length) array and changes values in
+// symbols[0..length) array in the following way:
+//   * when called, symbols[] contains indexes into out[], and has N unique
+//     values (possibly N < length)
+//   * on return, symbols'[i] = f(symbols[i]) and
+//                out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+//     where f is a bijection between the range of symbols[] and [0..N), and
+//     the first occurrences of values in symbols'[i] come in consecutive
+//     increasing order.
+// Returns N, the number of unique values in symbols[].
+template<typename HistogramType>
+size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
+  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
+  std::vector<uint32_t> new_index(length, kInvalidIndex);
+  uint32_t next_index = 0;
+  for (size_t i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == kInvalidIndex) {
+      new_index[symbols[i]] = next_index;
+      ++next_index;
+    }
+  }
+  std::vector<HistogramType> tmp(next_index);
+  next_index = 0;
+  for (size_t i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == next_index) {
+      tmp[next_index] = out[symbols[i]];
+      ++next_index;
+    }
+    symbols[i] = new_index[symbols[i]];
+  }
+  for (size_t i = 0; i < next_index; ++i) {
+    out[i] = tmp[i];
+  }
+  return next_index;
+}
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+template<typename HistogramType>
+void ClusterHistograms(const std::vector<HistogramType>& in,
+                       size_t num_contexts, size_t num_blocks,
+                       size_t max_histograms,
+                       std::vector<HistogramType>* out,
+                       std::vector<uint32_t>* histogram_symbols) {
+  const size_t in_size = num_contexts * num_blocks;
+  assert(in_size == in.size());
+  std::vector<uint32_t> cluster_size(in_size, 1);
+  std::vector<uint32_t> clusters(in_size);
+  size_t num_clusters = 0;
+  out->resize(in_size);
+  histogram_symbols->resize(in_size);
+  for (size_t i = 0; i < in_size; ++i) {
+    (*out)[i] = in[i];
+    (*out)[i].bit_cost_ = PopulationCost(in[i]);
+    (*histogram_symbols)[i] = static_cast<uint32_t>(i);
+  }
+
+  const size_t max_input_histograms = 64;
+  // For the first pass of clustering, we allow all pairs.
+  size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
+  std::vector<HistogramPair> pairs(max_num_pairs + 1);
+
+  for (size_t i = 0; i < in_size; i += max_input_histograms) {
+    size_t num_to_combine = std::min(in_size - i, max_input_histograms);
+    for (size_t j = 0; j < num_to_combine; ++j) {
+      clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
+    }
+    size_t num_new_clusters =
+        HistogramCombine(&(*out)[0], &cluster_size[0],
+                         &(*histogram_symbols)[i],
+                         &clusters[num_clusters], &pairs[0],
+                         num_to_combine, num_to_combine,
+                         max_histograms, max_num_pairs);
+    num_clusters += num_new_clusters;
+  }
+
+  // For the second pass, we limit the total number of histogram pairs.
+  // After this limit is reached, we only keep searching for the best pair.
+  max_num_pairs =
+      std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
+  pairs.resize(max_num_pairs + 1);
+
+  // Collapse similar histograms.
+  num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
+                                  &(*histogram_symbols)[0], &clusters[0],
+                                  &pairs[0], num_clusters, in_size,
+                                  max_histograms, max_num_pairs);
+
+  // Find the optimal map from original histograms to the final ones.
+  HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
+                 &(*out)[0], &(*histogram_symbols)[0]);
+
+  // Convert the context map to a canonical form.
+  size_t num_histograms =
+      HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
+  out->resize(num_histograms);
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_CLUSTER_H_
--- a/modules/brotli/enc/command.h
+++ b/modules/brotli/enc/command.h
@ -0,0 +1,156 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// This class models a sequence of literals and a backward reference copy.
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include "./fast_log.h"
+#include "./prefix.h"
+#include "./types.h"
+
+namespace brotli {
+
+static uint32_t kInsBase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
+    66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
+static uint32_t kInsExtra[] =  { 0, 0, 0, 0, 0, 0, 1, 1,  2,  2,  3,  3,  4,  4,
+    5,   5,   6,   7,   8,   9,   10,   12,   14,    24 };
+static uint32_t kCopyBase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
+    38, 54,  70, 102, 134, 198, 326,   582, 1094,  2118 };
+static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,
+     4,  4,   5,   5,   6,   7,   8,     9,   10,    24 };
+
+static inline uint16_t GetInsertLengthCode(size_t insertlen) {
+  if (insertlen < 6) {
+    return static_cast<uint16_t>(insertlen);
+  } else if (insertlen < 130) {
+    insertlen -= 2;
+    uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
+    return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
+  } else if (insertlen < 2114) {
+    return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
+  } else if (insertlen < 6210) {
+    return 21u;
+  } else if (insertlen < 22594) {
+    return 22u;
+  } else {
+    return 23u;
+  }
+}
+
+static inline uint16_t GetCopyLengthCode(size_t copylen) {
+  if (copylen < 10) {
+    return static_cast<uint16_t>(copylen - 2);
+  } else if (copylen < 134) {
+    copylen -= 6;
+    uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
+    return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
+  } else if (copylen < 2118) {
+    return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
+  } else {
+    return 23u;
+  }
+}
+
+static inline uint16_t CombineLengthCodes(
+    uint16_t inscode, uint16_t copycode, bool use_last_distance) {
+  uint16_t bits64 =
+      static_cast<uint16_t>((copycode & 0x7u) | ((inscode & 0x7u) << 3));
+  if (use_last_distance && inscode < 8 && copycode < 16) {
+    return (copycode < 8) ? bits64 : (bits64 | 64);
+  } else {
+    // "To convert an insert-and-copy length code to an insert length code and
+    // a copy length code, the following table can be used"
+    static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
+                                       448u, 576u, 640u };
+    return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
+  }
+}
+
+static inline void GetLengthCode(size_t insertlen, size_t copylen,
+                                 bool use_last_distance,
+                                 uint16_t* code) {
+  uint16_t inscode = GetInsertLengthCode(insertlen);
+  uint16_t copycode = GetCopyLengthCode(copylen);
+  *code = CombineLengthCodes(inscode, copycode, use_last_distance);
+}
+
+static inline uint32_t GetInsertBase(uint16_t inscode) {
+  return kInsBase[inscode];
+}
+
+static inline uint32_t GetInsertExtra(uint16_t inscode) {
+  return kInsExtra[inscode];
+}
+
+static inline uint32_t GetCopyBase(uint16_t copycode) {
+  return kCopyBase[copycode];
+}
+
+static inline uint32_t GetCopyExtra(uint16_t copycode) {
+  return kCopyExtra[copycode];
+}
+
+struct Command {
+  // distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
+  Command(size_t insertlen, size_t copylen, size_t copylen_code,
+          size_t distance_code)
+      : insert_len_(static_cast<uint32_t>(insertlen)) {
+    copy_len_ = static_cast<uint32_t>(
+        copylen | ((copylen_code ^ copylen) << 24));
+    // The distance prefix and extra bits are stored in this Command as if
+    // npostfix and ndirect were 0, they are only recomputed later after the
+    // clustering if needed.
+    PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
+    GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
+                  &cmd_prefix_);
+  }
+
+  explicit Command(size_t insertlen)
+      : insert_len_(static_cast<uint32_t>(insertlen))
+      , copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
+    GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
+  }
+
+  uint32_t DistanceCode(void) const {
+    if (dist_prefix_ < 16) {
+      return dist_prefix_;
+    }
+    uint32_t nbits = dist_extra_ >> 24;
+    uint32_t extra = dist_extra_ & 0xffffff;
+    uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
+    return (prefix << nbits) + extra + 12;
+  }
+
+  uint32_t DistanceContext(void) const {
+    uint32_t r = cmd_prefix_ >> 6;
+    uint32_t c = cmd_prefix_ & 7;
+    if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
+      return c;
+    }
+    return 3;
+  }
+
+  inline uint32_t copy_len(void) const {
+    return copy_len_ & 0xFFFFFF;
+  }
+
+  inline uint32_t copy_len_code(void) const {
+    return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
+  }
+
+  uint32_t insert_len_;
+  /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
+  uint32_t copy_len_;
+  uint32_t dist_extra_;
+  uint16_t cmd_prefix_;
+  uint16_t dist_prefix_;
+};
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_COMMAND_H_
--- a/modules/brotli/enc/compress_fragment.cc
+++ b/modules/brotli/enc/compress_fragment.cc
@ -0,0 +1,701 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Function for fast encoding of an input fragment, independently from the input
+// history. This function uses one-pass processing: when we find a backward
+// match, we immediately emit the corresponding command and literal codes to
+// the bit stream.
+//
+// Adapted from the CompressFragment() function in
+// https://github.com/google/snappy/blob/master/snappy.cc
+
+#include "./compress_fragment.h"
+
+#include <algorithm>
+#include <cstring>
+
+#include "./brotli_bit_stream.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./port.h"
+#include "./types.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+// kHashMul32 multiplier has these properties:
+// * The multiplier must be odd. Otherwise we may lose the highest bit.
+// * No long streaks of 1s or 0s.
+// * There is no effort to ensure that it is a prime, the oddity is enough
+//   for this use.
+// * The number has been tuned heuristically against compression benchmarks.
+static const uint32_t kHashMul32 = 0x1e35a7bd;
+
+static inline uint32_t Hash(const uint8_t* p, size_t shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
+  return static_cast<uint32_t>(h >> shift);
+}
+
+static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
+  assert(offset >= 0);
+  assert(offset <= 3);
+  const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
+  return static_cast<uint32_t>(h >> shift);
+}
+
+static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
+  return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
+          p1[4] == p2[4]);
+}
+
+// Builds a literal prefix code into "depths" and "bits" based on the statistics
+// of the "input" string and stores it into the bit stream.
+// Note that the prefix code here is built from the pre-LZ77 input, therefore
+// we can only approximate the statistics of the actual literal stream.
+// Moreover, for long inputs we build a histogram from a sample of the input
+// and thus have to assign a non-zero depth for each literal.
+static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
+                                           const size_t input_size,
+                                           uint8_t depths[256],
+                                           uint16_t bits[256],
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  uint32_t histogram[256] = { 0 };
+  size_t histogram_total;
+  if (input_size < (1 << 15)) {
+    for (size_t i = 0; i < input_size; ++i)  {
+      ++histogram[input[i]];
+    }
+    histogram_total = input_size;
+    for (size_t i = 0; i < 256; ++i) {
+      // We weigh the first 11 samples with weight 3 to account for the
+      // balancing effect of the LZ77 phase on the histogram.
+      const uint32_t adjust = 2 * std::min(histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  } else {
+    static const size_t kSampleRate = 29;
+    for (size_t i = 0; i < input_size; i += kSampleRate) {
+      ++histogram[input[i]];
+    }
+    histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
+    for (size_t i = 0; i < 256; ++i) {
+      // We add 1 to each population count to avoid 0 bit depths (since this is
+      // only a sample and we don't know if the symbol appears or not), and we
+      // weigh the first 11 samples with weight 3 to account for the balancing
+      // effect of the LZ77 phase on the histogram (more frequent symbols are
+      // more likely to be in backward references instead as literals).
+      const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
+      histogram[i] += adjust;
+      histogram_total += adjust;
+    }
+  }
+  BuildAndStoreHuffmanTreeFast(histogram, histogram_total,
+                               /* max_bits = */ 8,
+                               depths, bits, storage_ix, storage);
+}
+
+// Builds a command and distance prefix code (each 64 symbols) into "depth" and
+// "bits" based on "histogram" and stores it into the bit stream.
+static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
+                                           uint8_t depth[128],
+                                           uint16_t bits[128],
+                                           size_t* storage_ix,
+                                           uint8_t* storage) {
+  // Tree size for building a tree over 64 symbols is 2 * 64 + 1.
+  static const size_t kTreeSize = 129;
+  HuffmanTree tree[kTreeSize];
+  CreateHuffmanTree(histogram, 64, 15, tree, depth);
+  CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  // We have to jump through a few hoopes here in order to compute
+  // the command bits because the symbols are in a different order than in
+  // the full alphabet. This looks complicated, but having the symbols
+  // in this order in the command bits saves a few branches in the Emit*
+  // functions.
+  uint8_t cmd_depth[64];
+  uint16_t cmd_bits[64];
+  memcpy(cmd_depth, depth, 24);
+  memcpy(cmd_depth + 24, depth + 40, 8);
+  memcpy(cmd_depth + 32, depth + 24, 8);
+  memcpy(cmd_depth + 40, depth + 48, 8);
+  memcpy(cmd_depth + 48, depth + 32, 8);
+  memcpy(cmd_depth + 56, depth + 56, 8);
+  ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  memcpy(bits, cmd_bits, 48);
+  memcpy(bits + 24, cmd_bits + 32, 16);
+  memcpy(bits + 32, cmd_bits + 48, 16);
+  memcpy(bits + 40, cmd_bits + 24, 16);
+  memcpy(bits + 48, cmd_bits + 40, 16);
+  memcpy(bits + 56, cmd_bits + 56, 16);
+  ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  {
+    // Create the bit length array for the full command alphabet.
+    uint8_t cmd_depth[704] = { 0 };
+    memcpy(cmd_depth, depth, 8);
+    memcpy(cmd_depth + 64, depth + 8, 8);
+    memcpy(cmd_depth + 128, depth + 16, 8);
+    memcpy(cmd_depth + 192, depth + 24, 8);
+    memcpy(cmd_depth + 384, depth + 32, 8);
+    for (size_t i = 0; i < 8; ++i) {
+      cmd_depth[128 + 8 * i] = depth[40 + i];
+      cmd_depth[256 + 8 * i] = depth[48 + i];
+      cmd_depth[448 + 8 * i] = depth[56 + i];
+    }
+    StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
+  }
+  StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+}
+
+// REQUIRES: insertlen < 6210
+inline void EmitInsertLen(size_t insertlen,
+                          const uint8_t depth[128],
+                          const uint16_t bits[128],
+                          uint32_t histo[128],
+                          size_t* storage_ix,
+                          uint8_t* storage) {
+  if (insertlen < 6) {
+    const size_t code = insertlen + 40;
+    WriteBits(depth[code], bits[code], storage_ix, storage);
+    ++histo[code];
+  } else if (insertlen < 130) {
+    insertlen -= 2;
+    const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
+    const size_t prefix = insertlen >> nbits;
+    const size_t inscode = (nbits << 1) + prefix + 42;
+    WriteBits(depth[inscode], bits[inscode], storage_ix, storage);
+    WriteBits(nbits, insertlen - (prefix << nbits), storage_ix, storage);
+    ++histo[inscode];
+  } else if (insertlen < 2114) {
+    insertlen -= 66;
+    const uint32_t nbits = Log2FloorNonZero(insertlen);
+    const size_t code = nbits + 50;
+    WriteBits(depth[code], bits[code], storage_ix, storage);
+    WriteBits(nbits, insertlen - (1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    WriteBits(depth[61], bits[61], storage_ix, storage);
+    WriteBits(12, insertlen - 2114, storage_ix, storage);
+    ++histo[21];
+  }
+}
+
+inline void EmitLongInsertLen(size_t insertlen,
+                              const uint8_t depth[128],
+                              const uint16_t bits[128],
+                              uint32_t histo[128],
+                              size_t* storage_ix,
+                              uint8_t* storage) {
+  if (insertlen < 22594) {
+    WriteBits(depth[62], bits[62], storage_ix, storage);
+    WriteBits(14, insertlen - 6210, storage_ix, storage);
+    ++histo[22];
+  } else {
+    WriteBits(depth[63], bits[63], storage_ix, storage);
+    WriteBits(24, insertlen - 22594, storage_ix, storage);
+    ++histo[23];
+  }
+}
+
+inline void EmitCopyLen(size_t copylen,
+                        const uint8_t depth[128],
+                        const uint16_t bits[128],
+                        uint32_t histo[128],
+                        size_t* storage_ix,
+                        uint8_t* storage) {
+  if (copylen < 10) {
+    WriteBits(depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
+    ++histo[copylen + 14];
+  } else if (copylen < 134) {
+    copylen -= 6;
+    const uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
+    const size_t prefix = copylen >> nbits;
+    const size_t code = (nbits << 1) + prefix + 20;
+    WriteBits(depth[code], bits[code], storage_ix, storage);
+    WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 2118) {
+    copylen -= 70;
+    const uint32_t nbits = Log2FloorNonZero(copylen);
+    const size_t code = nbits + 28;
+    WriteBits(depth[code], bits[code], storage_ix, storage);
+    WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
+    ++histo[code];
+  } else {
+    WriteBits(depth[39], bits[39], storage_ix, storage);
+    WriteBits(24, copylen - 2118, storage_ix, storage);
+    ++histo[47];
+  }
+}
+
+inline void EmitCopyLenLastDistance(size_t copylen,
+                                    const uint8_t depth[128],
+                                    const uint16_t bits[128],
+                                    uint32_t histo[128],
+                                    size_t* storage_ix,
+                                    uint8_t* storage) {
+  if (copylen < 12) {
+    WriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
+    ++histo[copylen - 4];
+  } else if (copylen < 72) {
+    copylen -= 8;
+    const uint32_t nbits = Log2FloorNonZero(copylen) - 1;
+    const size_t prefix = copylen >> nbits;
+    const size_t code = (nbits << 1) + prefix + 4;
+    WriteBits(depth[code], bits[code], storage_ix, storage);
+    WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
+    ++histo[code];
+  } else if (copylen < 136) {
+    copylen -= 8;
+    const size_t code = (copylen >> 5) + 30;
+    WriteBits(depth[code], bits[code], storage_ix, storage);
+    WriteBits(5, copylen & 31, storage_ix, storage);
+    WriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else if (copylen < 2120) {
+    copylen -= 72;
+    const uint32_t nbits = Log2FloorNonZero(copylen);
+    const size_t code = nbits + 28;
+    WriteBits(depth[code], bits[code], storage_ix, storage);
+    WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
+    WriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[code];
+    ++histo[64];
+  } else {
+    WriteBits(depth[39], bits[39], storage_ix, storage);
+    WriteBits(24, copylen - 2120, storage_ix, storage);
+    WriteBits(depth[64], bits[64], storage_ix, storage);
+    ++histo[47];
+    ++histo[64];
+  }
+}
+
+inline void EmitDistance(size_t distance,
+                         const uint8_t depth[128],
+                         const uint16_t bits[128],
+                         uint32_t histo[128],
+                         size_t* storage_ix, uint8_t* storage) {
+  distance += 3;
+  const uint32_t nbits = Log2FloorNonZero(distance) - 1u;
+  const size_t prefix = (distance >> nbits) & 1;
+  const size_t offset = (2 + prefix) << nbits;
+  const size_t distcode = 2 * (nbits - 1) + prefix + 80;
+  WriteBits(depth[distcode], bits[distcode], storage_ix, storage);
+  WriteBits(nbits, distance - offset, storage_ix, storage);
+  ++histo[distcode];
+}
+
+inline void EmitLiterals(const uint8_t* input, const size_t len,
+                         const uint8_t depth[256], const uint16_t bits[256],
+                         size_t* storage_ix, uint8_t* storage) {
+  for (size_t j = 0; j < len; j++) {
+    const uint8_t lit = input[j];
+    WriteBits(depth[lit], bits[lit], storage_ix, storage);
+  }
+}
+
+// REQUIRES: len <= 1 << 20.
+static void StoreMetaBlockHeader(
+    size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
+  // ISLAST
+  WriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    // MNIBBLES is 4
+    WriteBits(2, 0, storage_ix, storage);
+    WriteBits(16, len - 1, storage_ix, storage);
+  } else {
+    // MNIBBLES is 5
+    WriteBits(2, 1, storage_ix, storage);
+    WriteBits(20, len - 1, storage_ix, storage);
+  }
+  // ISUNCOMPRESSED
+  WriteBits(1, is_uncompressed, storage_ix, storage);
+}
+
+static void UpdateBits(size_t n_bits,
+                       uint32_t bits,
+                       size_t pos,
+                       uint8_t *array) {
+  while (n_bits > 0) {
+    size_t byte_pos = pos >> 3;
+    size_t n_unchanged_bits = pos & 7;
+    size_t n_changed_bits = std::min(n_bits, 8 - n_unchanged_bits);
+    size_t total_bits = n_unchanged_bits + n_changed_bits;
+    uint32_t mask = (~((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1);
+    uint32_t unchanged_bits = array[byte_pos] & mask;
+    uint32_t changed_bits = bits & ((1 << n_changed_bits) - 1);
+    array[byte_pos] =
+        static_cast<uint8_t>((changed_bits << n_unchanged_bits) |
+                             unchanged_bits);
+    n_bits -= n_changed_bits;
+    bits >>= n_changed_bits;
+    pos += n_changed_bits;
+  }
+}
+
+static void RewindBitPosition(const size_t new_storage_ix,
+                              size_t* storage_ix, uint8_t* storage) {
+  const size_t bitpos = new_storage_ix & 7;
+  const size_t mask = (1u << bitpos) - 1;
+  storage[new_storage_ix >> 3] &= static_cast<uint8_t>(mask);
+  *storage_ix = new_storage_ix;
+}
+
+static bool ShouldMergeBlock(const uint8_t* data, size_t len,
+                             const uint8_t* depths) {
+  size_t histo[256] = { 0 };
+  static const size_t kSampleRate = 43;
+  for (size_t i = 0; i < len; i += kSampleRate) {
+    ++histo[data[i]];
+  }
+  const size_t total = (len + kSampleRate - 1) / kSampleRate;
+  double r = (FastLog2(total) + 0.5) * static_cast<double>(total) + 200;
+  for (size_t i = 0; i < 256; ++i) {
+    r -= static_cast<double>(histo[i]) * (depths[i] + FastLog2(histo[i]));
+  }
+  return r >= 0.0;
+}
+
+inline bool ShouldUseUncompressedMode(const uint8_t* metablock_start,
+                                      const uint8_t* next_emit,
+                                      const size_t insertlen,
+                                      const uint8_t literal_depths[256]) {
+  const size_t compressed = static_cast<size_t>(next_emit - metablock_start);
+  if (compressed * 50 > insertlen) {
+    return false;
+  }
+  static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
+  static const double kMinEntropy =
+      8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
+  uint32_t sum = 0;
+  for (int i = 0; i < 256; ++i) {
+    const uint32_t n = literal_depths[i];
+    sum += n << (15 - n);
+  }
+  return sum > static_cast<uint32_t>((1 << 15) * kMinEntropy);
+}
+
+static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
+                                      const size_t storage_ix_start,
+                                      size_t* storage_ix, uint8_t* storage) {
+  const size_t len = static_cast<size_t>(end - begin);
+  RewindBitPosition(storage_ix_start, storage_ix, storage);
+  StoreMetaBlockHeader(len, 1, storage_ix, storage);
+  *storage_ix = (*storage_ix + 7u) & ~7u;
+  memcpy(&storage[*storage_ix >> 3], begin, len);
+  *storage_ix += len << 3;
+  storage[*storage_ix >> 3] = 0;
+}
+
+void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
+                                bool is_last,
+                                int* table, size_t table_size,
+                                uint8_t cmd_depth[128], uint16_t cmd_bits[128],
+                                size_t* cmd_code_numbits, uint8_t* cmd_code,
+                                size_t* storage_ix, uint8_t* storage) {
+  if (input_size == 0) {
+    assert(is_last);
+    WriteBits(1, 1, storage_ix, storage);  // islast
+    WriteBits(1, 1, storage_ix, storage);  // isempty
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
+  }
+
+  // "next_emit" is a pointer to the first byte that is not covered by a
+  // previous copy. Bytes between "next_emit" and the start of the next copy or
+  // the end of the input will be emitted as literal bytes.
+  const uint8_t* next_emit = input;
+  // Save the start of the first block for position and distance computations.
+  const uint8_t* base_ip = input;
+
+  static const size_t kFirstBlockSize = 3 << 15;
+  static const size_t kMergeBlockSize = 1 << 16;
+
+  const uint8_t* metablock_start = input;
+  size_t block_size = std::min(input_size, kFirstBlockSize);
+  size_t total_block_size = block_size;
+  // Save the bit position of the MLEN field of the meta-block header, so that
+  // we can update it later if we decide to extend this meta-block.
+  size_t mlen_storage_ix = *storage_ix + 3;
+  StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+  // No block splits, no contexts.
+  WriteBits(13, 0, storage_ix, storage);
+
+  uint8_t lit_depth[256] = { 0 };
+  uint16_t lit_bits[256] = { 0 };
+  BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
+                                 storage_ix, storage);
+
+  // Store the pre-compressed command and distance prefix codes.
+  for (size_t i = 0; i + 7 < *cmd_code_numbits; i += 8) {
+    WriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+  }
+  WriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
+            storage_ix, storage);
+
+ emit_commands:
+  // Initialize the command and distance histograms. We will gather
+  // statistics of command and distance codes during the processing
+  // of this block and use it to update the command and distance
+  // prefix codes for the next block.
+  uint32_t cmd_histo[128] = {
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 0, 0, 0, 0,
+  };
+
+  // "ip" is the input pointer.
+  const uint8_t* ip = input;
+  assert(table_size);
+  assert(table_size <= (1u << 31));
+  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
+  const size_t shift = 64u - Log2FloorNonZero(table_size);
+  assert(table_size - 1 == static_cast<size_t>(
+      MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
+  const uint8_t* ip_end = input + block_size;
+
+  int last_distance = -1;
+  const size_t kInputMarginBytes = 16;
+  const size_t kMinMatchLen = 5;
+  if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    // For the last block, we need to keep a 16 bytes margin so that we can be
+    // sure that all distances are at most window size - 16.
+    // For all other blocks, we only need to keep a margin of 5 bytes so that
+    // we don't go over the block size with a copy.
+    const size_t len_limit = std::min(block_size - kMinMatchLen,
+                                      input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    for (uint32_t next_hash = Hash(++ip, shift); ; ) {
+      assert(next_emit < ip);
+      // Step 1: Scan forward in the input looking for a 5-byte-long match.
+      // If we get close to exhausting the input then goto emit_remainder.
+      //
+      // Heuristic match skipping: If 32 bytes are scanned with no matches
+      // found, start looking only at every other byte. If 32 more bytes are
+      // scanned, look at every third byte, etc.. When a match is found,
+      // immediately go back to looking at every byte. This is a small loss
+      // (~5% performance, ~0.1% density) for compressible data due to more
+      // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+      // win since the compressor quickly "realizes" the data is incompressible
+      // and doesn't bother looking for matches everywhere.
+      //
+      // The "skip" variable keeps track of how many bytes there are since the
+      // last match; dividing it by 32 (ie. right-shifting by five) gives the
+      // number of bytes to move ahead for each iteration.
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+      do {
+        ip = next_ip;
+        uint32_t hash = next_hash;
+        assert(hash == Hash(ip, shift));
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        next_ip = ip + bytes_between_hash_lookups;
+        if (PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate)) {
+          if (PREDICT_TRUE(candidate < ip)) {
+            table[hash] = static_cast<int>(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        assert(candidate >= base_ip);
+        assert(candidate < ip);
+
+        table[hash] = static_cast<int>(ip - base_ip);
+      } while (PREDICT_TRUE(!IsMatch(ip, candidate)));
+
+      // Step 2: Emit the found match together with the literal bytes from
+      // "next_emit" to the bit stream, and then see if we can find a next macth
+      // immediately afterwards. Repeat until we find no match for the input
+      // without emitting some literal bytes.
+      uint64_t input_bytes;
+
+      {
+        // We have a 5-byte match at ip, and we need to emit bytes in
+        // [next_emit, ip).
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
+        ip += matched;
+        int distance = static_cast<int>(base - candidate);  /* > 0 */
+        size_t insert = static_cast<size_t>(base - next_emit);
+        assert(0 == memcmp(base, candidate, matched));
+        if (PREDICT_TRUE(insert < 6210)) {
+          EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+        } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                             lit_depth)) {
+          EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
+                                    storage_ix, storage);
+          input_size -= static_cast<size_t>(base - input);
+          input = base;
+          next_emit = input;
+          goto next_block;
+        } else {
+          EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                            storage_ix, storage);
+        }
+        EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                     storage_ix, storage);
+        if (distance == last_distance) {
+          WriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
+          ++cmd_histo[64];
+        } else {
+          EmitDistance(static_cast<size_t>(distance), cmd_depth, cmd_bits,
+                       cmd_histo, storage_ix, storage);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
+                                storage_ix, storage);
+
+        next_emit = ip;
+        if (PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        // We could immediately start working at ip now, but to improve
+        // compression we first update "table" with the hashes of some positions
+        // within the last copy.
+        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
+        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
+        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
+        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+
+        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+        candidate = base_ip + table[cur_hash];
+        table[cur_hash] = static_cast<int>(ip - base_ip);
+      }
+
+      while (IsMatch(ip, candidate)) {
+        // We have a 5-byte match at ip, and no need to emit any literal bytes
+        // prior to ip.
+        const uint8_t* base = ip;
+        size_t matched = 5 + FindMatchLengthWithLimit(
+            candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
+        ip += matched;
+        last_distance = static_cast<int>(base - candidate);  /* > 0 */
+        assert(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+        EmitDistance(static_cast<size_t>(last_distance), cmd_depth, cmd_bits,
+                     cmd_histo, storage_ix, storage);
+
+        next_emit = ip;
+        if (PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        // We could immediately start working at ip now, but to improve
+        // compression we first update "table" with the hashes of some positions
+        // within the last copy.
+        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
+        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
+        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
+        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+
+        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+        candidate = base_ip + table[cur_hash];
+        table[cur_hash] = static_cast<int>(ip - base_ip);
+      }
+
+      next_hash = Hash(++ip, shift);
+    }
+  }
+
+ emit_remainder:
+  assert(next_emit <= ip_end);
+  input += block_size;
+  input_size -= block_size;
+  block_size = std::min(input_size, kMergeBlockSize);
+
+  // Decide if we want to continue this meta-block instead of emitting the
+  // last insert-only command.
+  if (input_size > 0 &&
+      total_block_size + block_size <= (1 << 20) &&
+      ShouldMergeBlock(input, block_size, lit_depth)) {
+    assert(total_block_size > (1 << 16));
+    // Update the size of the current meta-block and continue emitting commands.
+    // We can do this because the current size and the new size both have 5
+    // nibbles.
+    total_block_size += block_size;
+    UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
+               mlen_storage_ix, storage);
+    goto emit_commands;
+  }
+
+  // Emit the remaining bytes as literals.
+  if (next_emit < ip_end) {
+    const size_t insert = static_cast<size_t>(ip_end - next_emit);
+    if (PREDICT_TRUE(insert < 6210)) {
+      EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                    storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
+    } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
+                                         lit_depth)) {
+      EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
+                                storage_ix, storage);
+    } else {
+      EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
+                        storage_ix, storage);
+      EmitLiterals(next_emit, insert, lit_depth, lit_bits,
+                   storage_ix, storage);
+    }
+  }
+  next_emit = ip_end;
+
+next_block:
+  // If we have more data, write a new meta-block header and prefix codes and
+  // then continue emitting commands.
+  if (input_size > 0) {
+    metablock_start = input;
+    block_size = std::min(input_size, kFirstBlockSize);
+    total_block_size = block_size;
+    // Save the bit position of the MLEN field of the meta-block header, so that
+    // we can update it later if we decide to extend this meta-block.
+    mlen_storage_ix = *storage_ix + 3;
+    StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+    // No block splits, no contexts.
+    WriteBits(13, 0, storage_ix, storage);
+    memset(lit_depth, 0, sizeof(lit_depth));
+    memset(lit_bits, 0, sizeof(lit_bits));
+    BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
+                                   storage_ix, storage);
+    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
+                                   storage_ix, storage);
+    goto emit_commands;
+  }
+
+  if (is_last) {
+    WriteBits(1, 1, storage_ix, storage);  // islast
+    WriteBits(1, 1, storage_ix, storage);  // isempty
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  } else {
+    // If this is not the last block, update the command and distance prefix
+    // codes for the next block and store the compressed forms.
+    cmd_code[0] = 0;
+    *cmd_code_numbits = 0;
+    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
+                                   cmd_code_numbits, cmd_code);
+  }
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/compress_fragment.h
+++ b/modules/brotli/enc/compress_fragment.h
@ -0,0 +1,47 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Function for fast encoding of an input fragment, independently from the input
+// history. This function uses one-pass processing: when we find a backward
+// match, we immediately emit the corresponding command and literal codes to
+// the bit stream.
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
+
+#include "./types.h"
+
+namespace brotli {
+
+// Compresses "input" string to the "*storage" buffer as one or more complete
+// meta-blocks, and updates the "*storage_ix" bit position.
+//
+// If "is_last" is true, emits an additional empty last meta-block.
+//
+// "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
+// (see comment in encode.h) used for the encoding of this input fragment.
+// If "is_last" is false, they are updated to reflect the statistics
+// of this input fragment, to be used for the encoding of the next fragment.
+//
+// "*cmd_code_numbits" is the number of bits of the compressed representation
+// of the command and distance prefix codes, and "cmd_code" is an array of
+// at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
+// command and distance prefix codes. If "is_last" is false, these are also
+// updated to represent the updated "cmd_depth" and "cmd_bits".
+//
+// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
+                                bool is_last,
+                                int* table, size_t table_size,
+                                uint8_t cmd_depth[128], uint16_t cmd_bits[128],
+                                size_t* cmd_code_numbits, uint8_t* cmd_code,
+                                size_t* storage_ix, uint8_t* storage);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_COMPRESS_FRAGMENT_H_
--- a/modules/brotli/enc/compress_fragment_two_pass.cc
+++ b/modules/brotli/enc/compress_fragment_two_pass.cc
@ -0,0 +1,524 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Function for fast encoding of an input fragment, independently from the input
+// history. This function uses two-pass processing: in the first pass we save
+// the found backward matches and literal bytes into a buffer, and in the
+// second pass we emit them into the bit stream using prefix codes built based
+// on the actual command and literal byte histograms.
+
+#include "./compress_fragment_two_pass.h"
+
+#include <algorithm>
+
+#include "./brotli_bit_stream.h"
+#include "./bit_cost.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./port.h"
+#include "./types.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+// kHashMul32 multiplier has these properties:
+// * The multiplier must be odd. Otherwise we may lose the highest bit.
+// * No long streaks of 1s or 0s.
+// * There is no effort to ensure that it is a prime, the oddity is enough
+//   for this use.
+// * The number has been tuned heuristically against compression benchmarks.
+static const uint32_t kHashMul32 = 0x1e35a7bd;
+
+static inline uint32_t Hash(const uint8_t* p, size_t shift) {
+  const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
+  return static_cast<uint32_t>(h >> shift);
+}
+
+static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
+  assert(offset >= 0);
+  assert(offset <= 2);
+  const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
+  return static_cast<uint32_t>(h >> shift);
+}
+
+static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
+  return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
+          p1[4] == p2[4] &&
+          p1[5] == p2[5]);
+}
+
+// Builds a command and distance prefix code (each 64 symbols) into "depth" and
+// "bits" based on "histogram" and stores it into the bit stream.
+static void BuildAndStoreCommandPrefixCode(
+    const uint32_t histogram[128],
+    uint8_t depth[128], uint16_t bits[128],
+    size_t* storage_ix, uint8_t* storage) {
+  // Tree size for building a tree over 64 symbols is 2 * 64 + 1.
+  static const size_t kTreeSize = 129;
+  HuffmanTree tree[kTreeSize];
+  CreateHuffmanTree(histogram, 64, 15, tree, depth);
+  CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  // We have to jump through a few hoopes here in order to compute
+  // the command bits because the symbols are in a different order than in
+  // the full alphabet. This looks complicated, but having the symbols
+  // in this order in the command bits saves a few branches in the Emit*
+  // functions.
+  uint8_t cmd_depth[64];
+  uint16_t cmd_bits[64];
+  memcpy(cmd_depth, depth + 24, 24);
+  memcpy(cmd_depth + 24, depth, 8);
+  memcpy(cmd_depth + 32, depth + 48, 8);
+  memcpy(cmd_depth + 40, depth + 8, 8);
+  memcpy(cmd_depth + 48, depth + 56, 8);
+  memcpy(cmd_depth + 56, depth + 16, 8);
+  ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  memcpy(bits, cmd_bits + 24, 16);
+  memcpy(bits + 8, cmd_bits + 40, 16);
+  memcpy(bits + 16, cmd_bits + 56, 16);
+  memcpy(bits + 24, cmd_bits, 48);
+  memcpy(bits + 48, cmd_bits + 32, 16);
+  memcpy(bits + 56, cmd_bits + 48, 16);
+  ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  {
+    // Create the bit length array for the full command alphabet.
+    uint8_t cmd_depth[704] = { 0 };
+    memcpy(cmd_depth, depth + 24, 8);
+    memcpy(cmd_depth + 64, depth + 32, 8);
+    memcpy(cmd_depth + 128, depth + 40, 8);
+    memcpy(cmd_depth + 192, depth + 48, 8);
+    memcpy(cmd_depth + 384, depth + 56, 8);
+    for (size_t i = 0; i < 8; ++i) {
+      cmd_depth[128 + 8 * i] = depth[i];
+      cmd_depth[256 + 8 * i] = depth[8 + i];
+      cmd_depth[448 + 8 * i] = depth[16 + i];
+    }
+    StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
+  }
+  StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+}
+
+inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
+  if (insertlen < 6) {
+    **commands = insertlen;
+  } else if (insertlen < 130) {
+    insertlen -= 2;
+    const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
+    const uint32_t prefix = insertlen >> nbits;
+    const uint32_t inscode = (nbits << 1) + prefix + 2;
+    const uint32_t extra = insertlen - (prefix << nbits);
+    **commands = inscode | (extra << 8);
+  } else if (insertlen < 2114) {
+    insertlen -= 66;
+    const uint32_t nbits = Log2FloorNonZero(insertlen);
+    const uint32_t code = nbits + 10;
+    const uint32_t extra = insertlen - (1 << nbits);
+    **commands = code | (extra << 8);
+  } else if (insertlen < 6210) {
+    const uint32_t extra = insertlen - 2114;
+    **commands = 21 | (extra << 8);
+  } else if (insertlen < 22594) {
+    const uint32_t extra = insertlen - 6210;
+    **commands = 22 | (extra << 8);
+  } else {
+    const uint32_t extra = insertlen - 22594;
+    **commands = 23 | (extra << 8);
+  }
+  ++(*commands);
+}
+
+inline void EmitCopyLen(size_t copylen, uint32_t** commands) {
+  if (copylen < 10) {
+    **commands = static_cast<uint32_t>(copylen + 38);
+  } else if (copylen < 134) {
+    copylen -= 6;
+    const size_t nbits = Log2FloorNonZero(copylen) - 1;
+    const size_t prefix = copylen >> nbits;
+    const size_t code = (nbits << 1) + prefix + 44;
+    const size_t extra = copylen - (prefix << nbits);
+    **commands = static_cast<uint32_t>(code | (extra << 8));
+  } else if (copylen < 2118) {
+    copylen -= 70;
+    const size_t nbits = Log2FloorNonZero(copylen);
+    const size_t code = nbits + 52;
+    const size_t extra = copylen - (1 << nbits);
+    **commands = static_cast<uint32_t>(code | (extra << 8));
+  } else {
+    const size_t extra = copylen - 2118;
+    **commands = static_cast<uint32_t>(63 | (extra << 8));
+  }
+  ++(*commands);
+}
+
+inline void EmitCopyLenLastDistance(size_t copylen, uint32_t** commands) {
+  if (copylen < 12) {
+    **commands = static_cast<uint32_t>(copylen + 20);
+    ++(*commands);
+  } else if (copylen < 72) {
+    copylen -= 8;
+    const size_t nbits = Log2FloorNonZero(copylen) - 1;
+    const size_t prefix = copylen >> nbits;
+    const size_t code = (nbits << 1) + prefix + 28;
+    const size_t extra = copylen - (prefix << nbits);
+    **commands = static_cast<uint32_t>(code | (extra << 8));
+    ++(*commands);
+  } else if (copylen < 136) {
+    copylen -= 8;
+    const size_t code = (copylen >> 5) + 54;
+    const size_t extra = copylen & 31;
+    **commands = static_cast<uint32_t>(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else if (copylen < 2120) {
+    copylen -= 72;
+    const size_t nbits = Log2FloorNonZero(copylen);
+    const size_t code = nbits + 52;
+    const size_t extra = copylen - (1 << nbits);
+    **commands = static_cast<uint32_t>(code | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  } else {
+    const size_t extra = copylen - 2120;
+    **commands = static_cast<uint32_t>(63 | (extra << 8));
+    ++(*commands);
+    **commands = 64;
+    ++(*commands);
+  }
+}
+
+inline void EmitDistance(uint32_t distance, uint32_t** commands) {
+  distance += 3;
+  uint32_t nbits = Log2FloorNonZero(distance) - 1;
+  const uint32_t prefix = (distance >> nbits) & 1;
+  const uint32_t offset = (2 + prefix) << nbits;
+  const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
+  uint32_t extra = distance - offset;
+  **commands = distcode | (extra << 8);
+  ++(*commands);
+}
+
+// REQUIRES: len <= 1 << 20.
+static void StoreMetaBlockHeader(
+    size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
+  // ISLAST
+  WriteBits(1, 0, storage_ix, storage);
+  if (len <= (1U << 16)) {
+    // MNIBBLES is 4
+    WriteBits(2, 0, storage_ix, storage);
+    WriteBits(16, len - 1, storage_ix, storage);
+  } else {
+    // MNIBBLES is 5
+    WriteBits(2, 1, storage_ix, storage);
+    WriteBits(20, len - 1, storage_ix, storage);
+  }
+  // ISUNCOMPRESSED
+  WriteBits(1, is_uncompressed, storage_ix, storage);
+}
+
+static void CreateCommands(const uint8_t* input, size_t block_size,
+                           size_t input_size, const uint8_t* base_ip,
+                           int* table, size_t table_size,
+                           uint8_t** literals, uint32_t** commands) {
+  // "ip" is the input pointer.
+  const uint8_t* ip = input;
+  assert(table_size);
+  assert(table_size <= (1u << 31));
+  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
+  const size_t shift = 64u - Log2FloorNonZero(table_size);
+  assert(table_size - 1 == static_cast<size_t>(
+      MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
+  const uint8_t* ip_end = input + block_size;
+  // "next_emit" is a pointer to the first byte that is not covered by a
+  // previous copy. Bytes between "next_emit" and the start of the next copy or
+  // the end of the input will be emitted as literal bytes.
+  const uint8_t* next_emit = input;
+
+  int last_distance = -1;
+  const size_t kInputMarginBytes = 16;
+  const size_t kMinMatchLen = 6;
+  if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
+    // For the last block, we need to keep a 16 bytes margin so that we can be
+    // sure that all distances are at most window size - 16.
+    // For all other blocks, we only need to keep a margin of 5 bytes so that
+    // we don't go over the block size with a copy.
+    const size_t len_limit = std::min(block_size - kMinMatchLen,
+                                      input_size - kInputMarginBytes);
+    const uint8_t* ip_limit = input + len_limit;
+
+    for (uint32_t next_hash = Hash(++ip, shift); ; ) {
+      assert(next_emit < ip);
+      // Step 1: Scan forward in the input looking for a 6-byte-long match.
+      // If we get close to exhausting the input then goto emit_remainder.
+      //
+      // Heuristic match skipping: If 32 bytes are scanned with no matches
+      // found, start looking only at every other byte. If 32 more bytes are
+      // scanned, look at every third byte, etc.. When a match is found,
+      // immediately go back to looking at every byte. This is a small loss
+      // (~5% performance, ~0.1% density) for compressible data due to more
+      // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+      // win since the compressor quickly "realizes" the data is incompressible
+      // and doesn't bother looking for matches everywhere.
+      //
+      // The "skip" variable keeps track of how many bytes there are since the
+      // last match; dividing it by 32 (ie. right-shifting by five) gives the
+      // number of bytes to move ahead for each iteration.
+      uint32_t skip = 32;
+
+      const uint8_t* next_ip = ip;
+      const uint8_t* candidate;
+      do {
+        ip = next_ip;
+        uint32_t hash = next_hash;
+        assert(hash == Hash(ip, shift));
+        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        next_ip = ip + bytes_between_hash_lookups;
+        if (PREDICT_FALSE(next_ip > ip_limit)) {
+          goto emit_remainder;
+        }
+        next_hash = Hash(next_ip, shift);
+        candidate = ip - last_distance;
+        if (IsMatch(ip, candidate)) {
+          if (PREDICT_TRUE(candidate < ip)) {
+            table[hash] = static_cast<int>(ip - base_ip);
+            break;
+          }
+        }
+        candidate = base_ip + table[hash];
+        assert(candidate >= base_ip);
+        assert(candidate < ip);
+
+        table[hash] = static_cast<int>(ip - base_ip);
+      } while (PREDICT_TRUE(!IsMatch(ip, candidate)));
+
+      // Step 2: Emit the found match together with the literal bytes from
+      // "next_emit", and then see if we can find a next macth immediately
+      // afterwards. Repeat until we find no match for the input
+      // without emitting some literal bytes.
+      uint64_t input_bytes;
+
+      {
+        // We have a 6-byte match at ip, and we need to emit bytes in
+        // [next_emit, ip).
+        const uint8_t* base = ip;
+        size_t matched = 6 + FindMatchLengthWithLimit(
+            candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
+        ip += matched;
+        int distance = static_cast<int>(base - candidate);  /* > 0 */
+        int insert = static_cast<int>(base - next_emit);
+        assert(0 == memcmp(base, candidate, matched));
+        EmitInsertLen(static_cast<uint32_t>(insert), commands);
+        memcpy(*literals, next_emit, static_cast<size_t>(insert));
+        *literals += insert;
+        if (distance == last_distance) {
+          **commands = 64;
+          ++(*commands);
+        } else {
+          EmitDistance(static_cast<uint32_t>(distance), commands);
+          last_distance = distance;
+        }
+        EmitCopyLenLastDistance(matched, commands);
+
+        next_emit = ip;
+        if (PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        // We could immediately start working at ip now, but to improve
+        // compression we first update "table" with the hashes of some positions
+        // within the last copy.
+        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
+        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 5);
+        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 4);
+        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
+        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
+        prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
+        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+
+        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
+        candidate = base_ip + table[cur_hash];
+        table[cur_hash] = static_cast<int>(ip - base_ip);
+      }
+
+      while (IsMatch(ip, candidate)) {
+        // We have a 6-byte match at ip, and no need to emit any
+        // literal bytes prior to ip.
+        const uint8_t* base = ip;
+        size_t matched = 6 + FindMatchLengthWithLimit(
+            candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
+        ip += matched;
+        last_distance = static_cast<int>(base - candidate);  /* > 0 */
+        assert(0 == memcmp(base, candidate, matched));
+        EmitCopyLen(matched, commands);
+        EmitDistance(static_cast<uint32_t>(last_distance), commands);
+
+        next_emit = ip;
+        if (PREDICT_FALSE(ip >= ip_limit)) {
+          goto emit_remainder;
+        }
+        // We could immediately start working at ip now, but to improve
+        // compression we first update "table" with the hashes of some positions
+        // within the last copy.
+        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
+        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 5);
+        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 4);
+        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
+        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
+        prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
+        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+
+        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
+        candidate = base_ip + table[cur_hash];
+        table[cur_hash] = static_cast<int>(ip - base_ip);
+      }
+
+      next_hash = Hash(++ip, shift);
+    }
+  }
+
+emit_remainder:
+  assert(next_emit <= ip_end);
+  // Emit the remaining bytes as literals.
+  if (next_emit < ip_end) {
+    const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
+    EmitInsertLen(insert, commands);
+    memcpy(*literals, next_emit, insert);
+    *literals += insert;
+  }
+}
+
+static void StoreCommands(const uint8_t* literals, const size_t num_literals,
+                          const uint32_t* commands, const size_t num_commands,
+                          size_t* storage_ix, uint8_t* storage) {
+  uint8_t lit_depths[256] = { 0 };
+  uint16_t lit_bits[256] = { 0 };
+  uint32_t lit_histo[256] = { 0 };
+  for (size_t i = 0; i < num_literals; ++i) {
+    ++lit_histo[literals[i]];
+  }
+  BuildAndStoreHuffmanTreeFast(lit_histo, num_literals,
+                               /* max_bits = */ 8,
+                               lit_depths, lit_bits,
+                               storage_ix, storage);
+
+  uint8_t cmd_depths[128] = { 0 };
+  uint16_t cmd_bits[128] = { 0 };
+  uint32_t cmd_histo[128] = { 0 };
+  for (size_t i = 0; i < num_commands; ++i) {
+    ++cmd_histo[commands[i] & 0xff];
+  }
+  cmd_histo[1] += 1;
+  cmd_histo[2] += 1;
+  cmd_histo[64] += 1;
+  cmd_histo[84] += 1;
+  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
+                                 storage_ix, storage);
+
+  static const uint32_t kNumExtraBits[128] = {
+    0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+    9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
+    17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
+  };
+  static const uint32_t kInsertOffset[24] = {
+    0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
+    1090, 2114, 6210, 22594,
+  };
+
+  for (size_t i = 0; i < num_commands; ++i) {
+    const uint32_t cmd = commands[i];
+    const uint32_t code = cmd & 0xff;
+    const uint32_t extra = cmd >> 8;
+    WriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
+    WriteBits(kNumExtraBits[code], extra, storage_ix, storage);
+    if (code < 24) {
+      const uint32_t insert = kInsertOffset[code] + extra;
+      for (uint32_t j = 0; j < insert; ++j) {
+        const uint8_t lit = *literals;
+        WriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
+        ++literals;
+      }
+    }
+  }
+}
+
+static bool ShouldCompress(const uint8_t* input, size_t input_size,
+                           size_t num_literals) {
+  static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
+  static const double kMaxRatioOfLiterals =
+      1.0 - kAcceptableLossForUncompressibleSpeedup;
+  if (num_literals < kMaxRatioOfLiterals * static_cast<double>(input_size)) {
+    return true;
+  }
+  uint32_t literal_histo[256] = { 0 };
+  static const uint32_t kSampleRate = 43;
+  static const double kMaxEntropy =
+      8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
+  const double max_total_bit_cost =
+      static_cast<double>(input_size) * kMaxEntropy / kSampleRate;
+  for (size_t i = 0; i < input_size; i += kSampleRate) {
+    ++literal_histo[input[i]];
+  }
+  return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
+}
+
+void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
+                                   bool is_last,
+                                   uint32_t* command_buf, uint8_t* literal_buf,
+                                   int* table, size_t table_size,
+                                   size_t* storage_ix, uint8_t* storage) {
+  // Save the start of the first block for position and distance computations.
+  const uint8_t* base_ip = input;
+
+  while (input_size > 0) {
+    size_t block_size = std::min(input_size, kCompressFragmentTwoPassBlockSize);
+    uint32_t* commands = command_buf;
+    uint8_t* literals = literal_buf;
+    CreateCommands(input, block_size, input_size, base_ip, table, table_size,
+                   &literals, &commands);
+    const size_t num_literals = static_cast<size_t>(literals - literal_buf);
+    const size_t num_commands = static_cast<size_t>(commands - command_buf);
+    if (ShouldCompress(input, block_size, num_literals)) {
+      StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+      // No block splits, no contexts.
+      WriteBits(13, 0, storage_ix, storage);
+      StoreCommands(literal_buf, num_literals, command_buf, num_commands,
+                    storage_ix, storage);
+    } else {
+      // Since we did not find many backward references and the entropy of
+      // the data is close to 8 bits, we can simply emit an uncompressed block.
+      // This makes compression speed of uncompressible data about 3x faster.
+      StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
+      *storage_ix = (*storage_ix + 7u) & ~7u;
+      memcpy(&storage[*storage_ix >> 3], input, block_size);
+      *storage_ix += block_size << 3;
+      storage[*storage_ix >> 3] = 0;
+    }
+    input += block_size;
+    input_size -= block_size;
+  }
+
+  if (is_last) {
+    WriteBits(1, 1, storage_ix, storage);  // islast
+    WriteBits(1, 1, storage_ix, storage);  // isempty
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+  }
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/compress_fragment_two_pass.h
+++ b/modules/brotli/enc/compress_fragment_two_pass.h
@ -0,0 +1,40 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Function for fast encoding of an input fragment, independently from the input
+// history. This function uses two-pass processing: in the first pass we save
+// the found backward matches and literal bytes into a buffer, and in the
+// second pass we emit them into the bit stream using prefix codes built based
+// on the actual command and literal byte histograms.
+
+#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
+
+#include "./types.h"
+
+namespace brotli {
+
+static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
+
+// Compresses "input" string to the "*storage" buffer as one or more complete
+// meta-blocks, and updates the "*storage_ix" bit position.
+//
+// If "is_last" is true, emits an additional empty last meta-block.
+//
+// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
+// REQUIRES: "command_buf" and "literal_buf" point to at least
+//            kCompressFragmentTwoPassBlockSize long arrays.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
+                                   bool is_last,
+                                   uint32_t* command_buf, uint8_t* literal_buf,
+                                   int* table, size_t table_size,
+                                   size_t* storage_ix, uint8_t* storage);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
--- a/modules/brotli/enc/compressor.h
+++ b/modules/brotli/enc/compressor.h
@ -0,0 +1,15 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* C++ API for Brotli compression. */
+
+#ifndef BROTLI_ENC_COMPRESSOR_H_
+#define BROTLI_ENC_COMPRESSOR_H_
+
+#include "./encode.h"
+#include "./streams.h"
+
+#endif  /* BROTLI_ENC_COMPRESSOR_H_ */
--- a/modules/brotli/enc/context.h
+++ b/modules/brotli/enc/context.h
@ -0,0 +1,178 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Functions to map previous bytes into a context id.
+
+#ifndef BROTLI_ENC_CONTEXT_H_
+#define BROTLI_ENC_CONTEXT_H_
+
+#include "./types.h"
+
+namespace brotli {
+
+// Second-order context lookup table for UTF8 byte streams.
+//
+// If p1 and p2 are the previous two bytes, we calculate the context as
+//
+//   context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
+//
+// If the previous two bytes are ASCII characters (i.e. < 128), this will be
+// equivalent to
+//
+//   context = 4 * context1(p1) + context2(p2),
+//
+// where context1 is based on the previous byte in the following way:
+//
+//   0  : non-ASCII control
+//   1  : \t, \n, \r
+//   2  : space
+//   3  : other punctuation
+//   4  : " '
+//   5  : %
+//   6  : ( < [ {
+//   7  : ) > ] }
+//   8  : , ; :
+//   9  : .
+//   10 : =
+//   11 : number
+//   12 : upper-case vowel
+//   13 : upper-case consonant
+//   14 : lower-case vowel
+//   15 : lower-case consonant
+//
+// and context2 is based on the second last byte:
+//
+//   0 : control, space
+//   1 : punctuation
+//   2 : upper-case letter, number
+//   3 : lower-case letter
+//
+// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
+// stream it will be a continuation byte, value between 128 and 191), the
+// context is the same as if the second last byte was an ASCII control or space.
+//
+// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
+// be a continuation byte and the context id is 2 or 3 depending on the LSB of
+// the last byte and to a lesser extent on the second last byte if it is ASCII.
+//
+// If the last byte is a UTF8 continuation byte, the second last byte can be:
+//   - continuation byte: the next byte is probably ASCII or lead byte (assuming
+//     4-byte UTF8 characters are rare) and the context id is 0 or 1.
+//   - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
+//   - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
+//
+// The possible value combinations of the previous two bytes, the range of
+// context ids and the type of the next byte is summarized in the table below:
+//
+// |--------\-----------------------------------------------------------------|
+// |         \                         Last byte                              |
+// | Second   \---------------------------------------------------------------|
+// | last byte \    ASCII            |   cont. byte        |   lead byte      |
+// |            \   (0-127)          |   (128-191)         |   (192-)         |
+// |=============|===================|=====================|==================|
+// |  ASCII      | next: ASCII/lead  |  not valid          |  next: cont.     |
+// |  (0-127)    | context: 4 - 63   |                     |  context: 2 - 3  |
+// |-------------|-------------------|---------------------|------------------|
+// |  cont. byte | next: ASCII/lead  |  next: ASCII/lead   |  next: cont.     |
+// |  (128-191)  | context: 4 - 63   |  context: 0 - 1     |  context: 2 - 3  |
+// |-------------|-------------------|---------------------|------------------|
+// |  lead byte  | not valid         |  next: ASCII/lead   |  not valid       |
+// |  (192-207)  |                   |  context: 0 - 1     |                  |
+// |-------------|-------------------|---------------------|------------------|
+// |  lead byte  | not valid         |  next: cont.        |  not valid       |
+// |  (208-)     |                   |  context: 2 - 3     |                  |
+// |-------------|-------------------|---------------------|------------------|
+static const uint8_t kUTF8ContextLookup[512] = {
+  // Last byte.
+  //
+  // ASCII range.
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  4,  0,  0,  4,  0,  0,
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+   8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
+  44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
+  12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
+  52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
+  12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
+  60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12,  0,
+  // UTF8 continuation byte range.
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
+  // UTF8 lead byte range.
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
+  // Second last byte.
+  //
+  // ASCII range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+  1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
+  1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
+  // UTF8 continuation byte range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  // UTF8 lead byte range.
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+};
+
+// Context lookup table for small signed integers.
+static const uint8_t kSigned3BitContextLookup[] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+enum ContextType {
+  CONTEXT_LSB6         = 0,
+  CONTEXT_MSB6         = 1,
+  CONTEXT_UTF8         = 2,
+  CONTEXT_SIGNED       = 3
+};
+
+static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
+  switch (mode) {
+    case CONTEXT_LSB6:
+      return p1 & 0x3f;
+    case CONTEXT_MSB6:
+      return static_cast<uint8_t>(p1 >> 2);
+    case CONTEXT_UTF8:
+      return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
+    case CONTEXT_SIGNED:
+      return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
+                                  kSigned3BitContextLookup[p2]);
+    default:
+      return 0;
+  }
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_CONTEXT_H_
--- a/modules/brotli/enc/dictionary.cc
+++ b/modules/brotli/enc/dictionary.cc
--- a/modules/brotli/enc/dictionary.h
+++ b/modules/brotli/enc/dictionary.h
@ -0,0 +1,41 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Collection of static dictionary words.
+
+#ifndef BROTLI_ENC_DICTIONARY_H_
+#define BROTLI_ENC_DICTIONARY_H_
+
+#include "./types.h"
+
+// No namespace, use same identifier as for the C decoder.
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+extern const uint8_t kBrotliDictionary[122784];
+
+static const uint32_t kBrotliDictionaryOffsetsByLength[] = {
+     0,     0,     0,     0,     0,  4096,  9216, 21504, 35840, 44032,
+ 53248, 63488, 74752, 87040, 93696, 100864, 104704, 106752, 108928, 113536,
+ 115968, 118528, 119872, 121280, 122016,
+};
+
+static const uint8_t kBrotliDictionarySizeBitsByLength[] = {
+  0,  0,  0,  0, 10, 10, 11, 11, 10, 10,
+ 10, 10, 10,  9,  9,  8,  7,  7,  8,  7,
+  7,  6,  6,  5,  5,
+};
+
+static const int kBrotliMinDictionaryWordLength = 4;
+static const int kBrotliMaxDictionaryWordLength = 24;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    /* extern "C" */
+#endif
+
+#endif  // BROTLI_ENC_DICTIONARY_H_
--- a/modules/brotli/enc/dictionary_hash.h
+++ b/modules/brotli/enc/dictionary_hash.h
--- a/modules/brotli/enc/encode.cc
+++ b/modules/brotli/enc/encode.cc
--- a/modules/brotli/enc/encode.h
+++ b/modules/brotli/enc/encode.h
@ -0,0 +1,209 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// API for Brotli compression
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <string>
+#include <vector>
+#include "./command.h"
+#include "./hash.h"
+#include "./ringbuffer.h"
+#include "./static_dict.h"
+#include "./streams.h"
+#include "./types.h"
+
+namespace brotli {
+
+static const int kMaxWindowBits = 24;
+static const int kMinWindowBits = 10;
+static const int kMinInputBlockBits = 16;
+static const int kMaxInputBlockBits = 24;
+
+struct BrotliParams {
+  BrotliParams(void)
+      : mode(MODE_GENERIC),
+        quality(11),
+        lgwin(22),
+        lgblock(0),
+        enable_dictionary(true),
+        enable_transforms(false),
+        greedy_block_split(false),
+        enable_context_modeling(true) {}
+
+  enum Mode {
+    // Default compression mode. The compressor does not know anything in
+    // advance about the properties of the input.
+    MODE_GENERIC = 0,
+    // Compression mode for UTF-8 format text input.
+    MODE_TEXT = 1,
+    // Compression mode used in WOFF 2.0.
+    MODE_FONT = 2
+  };
+  Mode mode;
+
+  // Controls the compression-speed vs compression-density tradeoffs. The higher
+  // the quality, the slower the compression. Range is 0 to 11.
+  int quality;
+  // Base 2 logarithm of the sliding window size. Range is 10 to 24.
+  int lgwin;
+  // Base 2 logarithm of the maximum input block size. Range is 16 to 24.
+  // If set to 0, the value will be set based on the quality.
+  int lgblock;
+
+  // These settings are deprecated and will be ignored.
+  // All speed vs. size compromises are controlled by the quality param.
+  bool enable_dictionary;
+  bool enable_transforms;
+  bool greedy_block_split;
+  bool enable_context_modeling;
+};
+
+// An instance can not be reused for multiple brotli streams.
+class BrotliCompressor {
+ public:
+  explicit BrotliCompressor(BrotliParams params);
+  ~BrotliCompressor(void);
+
+  // The maximum input size that can be processed at once.
+  size_t input_block_size(void) const { return size_t(1) << params_.lgblock; }
+
+  // Encodes the data in input_buffer as a meta-block and writes it to
+  // encoded_buffer (*encoded_size should be set to the size of
+  // encoded_buffer) and sets *encoded_size to the number of bytes that
+  // was written. The input_size must be <= input_block_size().
+  // Returns 0 if there was an error and 1 otherwise.
+  bool WriteMetaBlock(const size_t input_size,
+                      const uint8_t* input_buffer,
+                      const bool is_last,
+                      size_t* encoded_size,
+                      uint8_t* encoded_buffer);
+
+  // Writes a metadata meta-block containing the given input to encoded_buffer.
+  // *encoded_size should be set to the size of the encoded_buffer.
+  // Sets *encoded_size to the number of bytes that was written.
+  // Note that the given input data will not be part of the sliding window and
+  // thus no backward references can be made to this data from subsequent
+  // metablocks.
+  bool WriteMetadata(const size_t input_size,
+                     const uint8_t* input_buffer,
+                     const bool is_last,
+                     size_t* encoded_size,
+                     uint8_t* encoded_buffer);
+
+  // Writes a zero-length meta-block with end-of-input bit set to the
+  // internal output buffer and copies the output buffer to encoded_buffer
+  // (*encoded_size should be set to the size of encoded_buffer) and sets
+  // *encoded_size to the number of bytes written. Returns false if there was
+  // an error and true otherwise.
+  bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
+
+  // Copies the given input data to the internal ring buffer of the compressor.
+  // No processing of the data occurs at this time and this function can be
+  // called multiple times before calling WriteBrotliData() to process the
+  // accumulated input. At most input_block_size() bytes of input data can be
+  // copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+  void CopyInputToRingBuffer(const size_t input_size,
+                             const uint8_t* input_buffer);
+
+  // Processes the accumulated input data and sets *out_size to the length of
+  // the new output meta-block, or to zero if no new output meta-block was
+  // created (in this case the processed input data is buffered internally).
+  // If *out_size is positive, *output points to the start of the output data.
+  // If is_last or force_flush is true, an output meta-block is always created.
+  // Returns false if the size of the input data is larger than
+  // input_block_size().
+  bool WriteBrotliData(const bool is_last, const bool force_flush,
+                       size_t* out_size, uint8_t** output);
+
+  // Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
+  // e.g. for custom static dictionaries for data formats.
+  // Not to be confused with the built-in transformable dictionary of Brotli.
+  // To decode, use BrotliSetCustomDictionary of the decoder with the same
+  // dictionary.
+  void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
+
+  // No-op, but we keep it here for API backward-compatibility.
+  void WriteStreamHeader(void) {}
+
+ private:
+  uint8_t* GetBrotliStorage(size_t size);
+
+  // Allocates and clears a hash table using memory in "*this",
+  // stores the number of buckets in "*table_size" and returns a pointer to
+  // the base of the hash table.
+  int* GetHashTable(int quality,
+                    size_t input_size, size_t* table_size);
+
+  BrotliParams params_;
+  Hashers* hashers_;
+  int hash_type_;
+  uint64_t input_pos_;
+  RingBuffer* ringbuffer_;
+  size_t cmd_alloc_size_;
+  Command* commands_;
+  size_t num_commands_;
+  size_t num_literals_;
+  size_t last_insert_len_;
+  uint64_t last_flush_pos_;
+  uint64_t last_processed_pos_;
+  int dist_cache_[4];
+  int saved_dist_cache_[4];
+  uint8_t last_byte_;
+  uint8_t last_byte_bits_;
+  uint8_t prev_byte_;
+  uint8_t prev_byte2_;
+  size_t storage_size_;
+  uint8_t* storage_;
+  // Hash table for quality 0 mode.
+  int small_table_[1 << 10];  // 2KB
+  int* large_table_;          // Allocated only when needed
+  // Command and distance prefix codes (each 64 symbols, stored back-to-back)
+  // used for the next block in quality 0. The command prefix code is over a
+  // smaller alphabet with the following 64 symbols:
+  //    0 - 15: insert length code 0, copy length code 0 - 15, same distance
+  //   16 - 39: insert length code 0, copy length code 0 - 23
+  //   40 - 63: insert length code 0 - 23, copy length code 0
+  // Note that symbols 16 and 40 represent the same code in the full alphabet,
+  // but we do not use either of them in quality 0.
+  uint8_t cmd_depths_[128];
+  uint16_t cmd_bits_[128];
+  // The compressed form of the command and distance prefix codes for the next
+  // block in quality 0.
+  uint8_t cmd_code_[512];
+  size_t cmd_code_numbits_;
+  // Command and literal buffers for quality 1.
+  uint32_t* command_buf_;
+  uint8_t* literal_buf_;
+  
+  int is_last_block_emitted_;
+};
+
+// Compresses the data in input_buffer into encoded_buffer, and sets
+// *encoded_size to the compressed length.
+// Returns 0 if there was an error and 1 otherwise.
+int BrotliCompressBuffer(BrotliParams params,
+                         size_t input_size,
+                         const uint8_t* input_buffer,
+                         size_t* encoded_size,
+                         uint8_t* encoded_buffer);
+
+// Same as above, but uses the specified input and output classes instead
+// of reading from and writing to pre-allocated memory buffers.
+int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
+
+// Before compressing the data, sets a custom LZ77 dictionary with
+// BrotliCompressor::BrotliSetCustomDictionary.
+int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
+                                       BrotliParams params,
+                                       BrotliIn* in, BrotliOut* out);
+
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENCODE_H_
--- a/modules/brotli/enc/encode_parallel.cc
+++ b/modules/brotli/enc/encode_parallel.cc
@ -0,0 +1,279 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Implementation of parallel Brotli compressor.
+
+#include "./encode_parallel.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "./backward_references.h"
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./brotli_bit_stream.h"
+#include "./cluster.h"
+#include "./context.h"
+#include "./metablock.h"
+#include "./transform.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./hash.h"
+#include "./histogram.h"
+#include "./prefix.h"
+#include "./utf8_util.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+namespace {
+
+void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
+                               uint32_t num_direct_distance_codes,
+                               uint32_t distance_postfix_bits) {
+  if (num_direct_distance_codes == 0 &&
+      distance_postfix_bits == 0) {
+    return;
+  }
+  for (size_t i = 0; i < num_commands; ++i) {
+    Command* cmd = &cmds[i];
+    if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
+      PrefixEncodeCopyDistance(cmd->DistanceCode(),
+                               num_direct_distance_codes,
+                               distance_postfix_bits,
+                               &cmd->dist_prefix_,
+                               &cmd->dist_extra_);
+    }
+  }
+}
+
+bool WriteMetaBlockParallel(const BrotliParams& params,
+                            const uint32_t input_size,
+                            const uint8_t* input_buffer,
+                            const uint32_t prefix_size,
+                            const uint8_t* prefix_buffer,
+                            const bool is_first,
+                            const bool is_last,
+                            size_t* encoded_size,
+                            uint8_t* encoded_buffer) {
+  if (input_size == 0) {
+    return false;
+  }
+
+  // Copy prefix + next input block into a continuous area.
+  uint32_t input_pos = prefix_size;
+  // CreateBackwardReferences reads up to 3 bytes past the end of input if the
+  // mask points past the end of input.
+  // FindMatchLengthWithLimit could do another 8 bytes look-forward.
+  std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
+  memcpy(&input[0], prefix_buffer, prefix_size);
+  memcpy(&input[input_pos], input_buffer, input_size);
+  // Since we don't have a ringbuffer, masking is a no-op.
+  // We use one less bit than the full range because some of the code uses
+  // mask + 1 as the size of the ringbuffer.
+  const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
+
+  uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
+  uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
+
+  // Decide about UTF8 mode.
+  static const double kMinUTF8Ratio = 0.75;
+  bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
+                                kMinUTF8Ratio);
+
+  // Initialize hashers.
+  int hash_type = std::min(10, params.quality);
+  Hashers* hashers = new Hashers();
+  hashers->Init(hash_type);
+
+  // Compute backward references.
+  size_t last_insert_len = 0;
+  size_t num_commands = 0;
+  size_t num_literals = 0;
+  int dist_cache[4] = { -4, -4, -4, -4 };
+  Command* commands = static_cast<Command*>(
+      malloc(sizeof(Command) * ((input_size + 1) >> 1)));
+  if (commands == 0) {
+    delete hashers;
+    return false;
+  }
+  CreateBackwardReferences(
+      input_size, input_pos, is_last,
+      &input[0], mask,
+      params.quality,
+      params.lgwin,
+      hashers,
+      hash_type,
+      dist_cache,
+      &last_insert_len,
+      commands,
+      &num_commands,
+      &num_literals);
+  delete hashers;
+  if (last_insert_len > 0) {
+    commands[num_commands++] = Command(last_insert_len);
+    num_literals += last_insert_len;
+  }
+  assert(num_commands != 0);
+
+  // Build the meta-block.
+  MetaBlockSplit mb;
+  uint32_t num_direct_distance_codes =
+      params.mode == BrotliParams::MODE_FONT ? 12 : 0;
+  uint32_t distance_postfix_bits =
+      params.mode == BrotliParams::MODE_FONT ? 1 : 0;
+  ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
+  RecomputeDistancePrefixes(commands, num_commands,
+                            num_direct_distance_codes,
+                            distance_postfix_bits);
+  if (params.quality <= 9) {
+    BuildMetaBlockGreedy(&input[0], input_pos, mask,
+                         commands, num_commands,
+                         &mb);
+  } else {
+    BuildMetaBlock(&input[0], input_pos, mask,
+                   prev_byte, prev_byte2,
+                   commands, num_commands,
+                   literal_context_mode,
+                   &mb);
+  }
+
+  // Set up the temporary output storage.
+  const size_t max_out_size = 2 * input_size + 500;
+  std::vector<uint8_t> storage(max_out_size);
+  uint8_t first_byte = 0;
+  size_t first_byte_bits = 0;
+  if (is_first) {
+    if (params.lgwin == 16) {
+      first_byte = 0;
+      first_byte_bits = 1;
+    } else if (params.lgwin == 17) {
+      first_byte = 1;
+      first_byte_bits = 7;
+    } else {
+      first_byte = static_cast<uint8_t>(((params.lgwin - 17) << 1) | 1);
+      first_byte_bits = 4;
+    }
+  }
+  storage[0] = static_cast<uint8_t>(first_byte);
+  size_t storage_ix = first_byte_bits;
+
+  // Store the meta-block to the temporary output.
+  StoreMetaBlock(&input[0], input_pos, input_size, mask,
+                 prev_byte, prev_byte2,
+                 is_last,
+                 num_direct_distance_codes,
+                 distance_postfix_bits,
+                 literal_context_mode,
+                 commands, num_commands,
+                 mb,
+                 &storage_ix, &storage[0]);
+  free(commands);
+
+  // If this is not the last meta-block, store an empty metadata
+  // meta-block so that the meta-block will end at a byte boundary.
+  if (!is_last) {
+    StoreSyncMetaBlock(&storage_ix, &storage[0]);
+  }
+
+  // If the compressed data is too large, fall back to an uncompressed
+  // meta-block.
+  size_t output_size = storage_ix >> 3;
+  if (input_size + 4 < output_size) {
+    storage[0] = static_cast<uint8_t>(first_byte);
+    storage_ix = first_byte_bits;
+    StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
+                               input_size,
+                               &storage_ix, &storage[0]);
+    output_size = storage_ix >> 3;
+  }
+
+  // Copy the temporary output with size-check to the output.
+  if (output_size > *encoded_size) {
+    return false;
+  }
+  memcpy(encoded_buffer, &storage[0], output_size);
+  *encoded_size = output_size;
+  return true;
+}
+
+}  // namespace
+
+int BrotliCompressBufferParallel(BrotliParams params,
+                                 size_t input_size,
+                                 const uint8_t* input_buffer,
+                                 size_t* encoded_size,
+                                 uint8_t* encoded_buffer) {
+  if (*encoded_size == 0) {
+    // Output buffer needs at least one byte.
+    return 0;
+  } else  if (input_size == 0) {
+    encoded_buffer[0] = 6;
+    *encoded_size = 1;
+    return 1;
+  }
+
+  // Sanitize params.
+  if (params.lgwin < kMinWindowBits) {
+    params.lgwin = kMinWindowBits;
+  } else if (params.lgwin > kMaxWindowBits) {
+    params.lgwin = kMaxWindowBits;
+  }
+  if (params.lgblock == 0) {
+    params.lgblock = 16;
+    if (params.quality >= 9 && params.lgwin > params.lgblock) {
+      params.lgblock = std::min(21, params.lgwin);
+    }
+  } else if (params.lgblock < kMinInputBlockBits) {
+    params.lgblock = kMinInputBlockBits;
+  } else if (params.lgblock > kMaxInputBlockBits) {
+    params.lgblock = kMaxInputBlockBits;
+  }
+  size_t max_input_block_size = 1 << params.lgblock;
+  size_t max_prefix_size = 1u << params.lgwin;
+
+  std::vector<std::vector<uint8_t> > compressed_pieces;
+
+  // Compress block-by-block independently.
+  for (size_t pos = 0; pos < input_size; ) {
+    uint32_t input_block_size =
+        static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
+    uint32_t prefix_size =
+        static_cast<uint32_t>(std::min(max_prefix_size, pos));
+    size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
+    std::vector<uint8_t> out(out_size);
+    if (!WriteMetaBlockParallel(params,
+                                input_block_size,
+                                &input_buffer[pos],
+                                prefix_size,
+                                &input_buffer[pos - prefix_size],
+                                pos == 0,
+                                pos + input_block_size == input_size,
+                                &out_size,
+                                &out[0])) {
+      return false;
+    }
+    out.resize(out_size);
+    compressed_pieces.push_back(out);
+    pos += input_block_size;
+  }
+
+  // Piece together the output.
+  size_t out_pos = 0;
+  for (size_t i = 0; i < compressed_pieces.size(); ++i) {
+    const std::vector<uint8_t>& out = compressed_pieces[i];
+    if (out_pos + out.size() > *encoded_size) {
+      return false;
+    }
+    memcpy(&encoded_buffer[out_pos], &out[0], out.size());
+    out_pos += out.size();
+  }
+  *encoded_size = out_pos;
+
+  return true;
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/encode_parallel.h
+++ b/modules/brotli/enc/encode_parallel.h
@ -0,0 +1,28 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// API for parallel Brotli compression
+// Note that this is only a proof of concept currently and not part of the
+// final API yet.
+
+#ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
+#define BROTLI_ENC_ENCODE_PARALLEL_H_
+
+
+#include "./encode.h"
+#include "./types.h"
+
+namespace brotli {
+
+int BrotliCompressBufferParallel(BrotliParams params,
+                                 size_t input_size,
+                                 const uint8_t* input_buffer,
+                                 size_t* encoded_size,
+                                 uint8_t* encoded_buffer);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENCODE_PARALLEL_H_
--- a/modules/brotli/enc/entropy_encode.cc
+++ b/modules/brotli/enc/entropy_encode.cc
@ -0,0 +1,480 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Entropy encoding (Huffman) utilities.
+
+#include "./entropy_encode.h"
+
+#include <algorithm>
+#include <limits>
+#include <cstdlib>
+
+#include "./histogram.h"
+#include "./port.h"
+#include "./types.h"
+
+namespace brotli {
+
+void SetDepth(const HuffmanTree &p,
+              HuffmanTree *pool,
+              uint8_t *depth,
+              uint8_t level) {
+  if (p.index_left_ >= 0) {
+    ++level;
+    SetDepth(pool[p.index_left_], pool, depth, level);
+    SetDepth(pool[p.index_right_or_value_], pool, depth, level);
+  } else {
+    depth[p.index_right_or_value_] = level;
+  }
+}
+
+// Sort the root nodes, least popular first.
+static inline bool SortHuffmanTree(const HuffmanTree& v0,
+                                   const HuffmanTree& v1) {
+  if (v0.total_count_ != v1.total_count_) {
+    return v0.total_count_ < v1.total_count_;
+  }
+  return v0.index_right_or_value_ > v1.index_right_or_value_;
+}
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t *data,
+                       const size_t length,
+                       const int tree_limit,
+                       HuffmanTree* tree,
+                       uint8_t *depth) {
+  // For block sizes below 64 kB, we never need to do a second iteration
+  // of this loop. Probably all of our block sizes will be smaller than
+  // that, so this loop is mostly of academic interest. If we actually
+  // would need this, we would be better off with the Katajainen algorithm.
+  for (uint32_t count_limit = 1; ; count_limit *= 2) {
+    size_t n = 0;
+    for (size_t i = length; i != 0;) {
+      --i;
+      if (data[i]) {
+        const uint32_t count = std::max(data[i], count_limit);
+        tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
+      }
+    }
+
+    if (n == 1) {
+      depth[tree[0].index_right_or_value_] = 1;      // Only one element.
+      break;
+    }
+
+    std::sort(tree, tree + n, SortHuffmanTree);
+
+    // The nodes are:
+    // [0, n): the sorted leaf nodes that we start with.
+    // [n]: we add a sentinel here.
+    // [n + 1, 2n): new parent nodes are added here, starting from
+    //              (n+1). These are naturally in ascending order.
+    // [2n]: we add a sentinel at the end as well.
+    // There will be (2n+1) elements at the end.
+    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+    tree[n] = sentinel;
+    tree[n + 1] = sentinel;
+
+    size_t i = 0;      // Points to the next leaf node.
+    size_t j = n + 1;  // Points to the next non-leaf node.
+    for (size_t k = n - 1; k != 0; --k) {
+      size_t left, right;
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      // The sentinel node becomes the parent node.
+      size_t j_end = 2 * n - k;
+      tree[j_end].total_count_ =
+          tree[left].total_count_ + tree[right].total_count_;
+      tree[j_end].index_left_ = static_cast<int16_t>(left);
+      tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
+
+      // Add back the last sentinel node.
+      tree[j_end + 1] = sentinel;
+    }
+    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+    // We need to pack the Huffman tree in tree_limit bits.
+    // If this was not successful, add fake entities to the lowest values
+    // and retry.
+    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+      break;
+    }
+  }
+}
+
+static void Reverse(uint8_t* v, size_t start, size_t end) {
+  --end;
+  while (start < end) {
+    uint8_t tmp = v[start];
+    v[start] = v[end];
+    v[end] = tmp;
+    ++start;
+    --end;
+  }
+}
+
+static void WriteHuffmanTreeRepetitions(
+    const uint8_t previous_value,
+    const uint8_t value,
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  assert(repetitions > 0);
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions == 7) {
+    tree[*tree_size] = value;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = value;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 16;
+      extra_bits_data[*tree_size] = repetitions & 0x3;
+      ++(*tree_size);
+      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+static void WriteHuffmanTreeRepetitionsZeros(
+    size_t repetitions,
+    size_t* tree_size,
+    uint8_t* tree,
+    uint8_t* extra_bits_data) {
+  if (repetitions == 11) {
+    tree[*tree_size] = 0;
+    extra_bits_data[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  if (repetitions < 3) {
+    for (size_t i = 0; i < repetitions; ++i) {
+      tree[*tree_size] = 0;
+      extra_bits_data[*tree_size] = 0;
+      ++(*tree_size);
+    }
+  } else {
+    repetitions -= 3;
+    size_t start = *tree_size;
+    while (true) {
+      tree[*tree_size] = 17;
+      extra_bits_data[*tree_size] = repetitions & 0x7;
+      ++(*tree_size);
+      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
+      --repetitions;
+    }
+    Reverse(tree, start, *tree_size);
+    Reverse(extra_bits_data, start, *tree_size);
+  }
+}
+
+void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
+                                 uint8_t* good_for_rle) {
+  size_t nonzero_count = 0;
+  size_t stride;
+  size_t limit;
+  size_t sum;
+  const size_t streak_limit = 1240;
+  // Let's make the Huffman code more compatible with rle encoding.
+  size_t i;
+  for (i = 0; i < length; i++) {
+    if (counts[i]) {
+      ++nonzero_count;
+    }
+  }
+  if (nonzero_count < 16) {
+    return;
+  }
+  while (length != 0 && counts[length - 1] == 0) {
+    --length;
+  }
+  if (length == 0) {
+    return;  // All zeros.
+  }
+  // Now counts[0..length - 1] does not have trailing zeros.
+  {
+    size_t nonzeros = 0;
+    uint32_t smallest_nonzero = 1 << 30;
+    for (i = 0; i < length; ++i) {
+      if (counts[i] != 0) {
+        ++nonzeros;
+        if (smallest_nonzero > counts[i]) {
+          smallest_nonzero = counts[i];
+        }
+      }
+    }
+    if (nonzeros < 5) {
+      // Small histogram will model it well.
+      return;
+    }
+    size_t zeros = length - nonzeros;
+    if (smallest_nonzero < 4) {
+      if (zeros < 6) {
+        for (i = 1; i < length - 1; ++i) {
+          if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
+            counts[i] = 1;
+          }
+        }
+      }
+    }
+    if (nonzeros < 28) {
+      return;
+    }
+  }
+  // 2) Let's mark all population counts that already can be encoded
+  // with an rle code.
+  memset(good_for_rle, 0, length);
+  {
+    // Let's not spoil any of the existing good rle codes.
+    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+    uint32_t symbol = counts[0];
+    size_t step = 0;
+    for (i = 0; i <= length; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && step >= 5) ||
+            (symbol != 0 && step >= 7)) {
+          size_t k;
+          for (k = 0; k < step; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        step = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++step;
+      }
+    }
+  }
+  // 3) Let's replace those population counts that lead to more rle codes.
+  // Math here is in 24.8 fixed point representation.
+  stride = 0;
+  limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
+  sum = 0;
+  for (i = 0; i <= length; ++i) {
+    if (i == length || good_for_rle[i] ||
+        (i != 0 && good_for_rle[i - 1]) ||
+        (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
+      if (stride >= 4 || (stride >= 3 && sum == 0)) {
+        size_t k;
+        // The stride must end, collapse what we have, if we have enough (4).
+        size_t count = (sum + stride / 2) / stride;
+        if (count == 0) {
+          count = 1;
+        }
+        if (sum == 0) {
+          // Don't make an all zeros stride to be upgraded to ones.
+          count = 0;
+        }
+        for (k = 0; k < stride; ++k) {
+          // We don't want to change value at counts[i],
+          // that is already belonging to the next stride. Thus - 1.
+          counts[i - k - 1] = static_cast<uint32_t>(count);
+        }
+      }
+      stride = 0;
+      sum = 0;
+      if (i < length - 2) {
+        // All interesting strides have a count of at least 4,
+        // at least when non-zeros.
+        limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
+      } else if (i < length) {
+        limit = 256 * counts[i];
+      } else {
+        limit = 0;
+      }
+    }
+    ++stride;
+    if (i != length) {
+      sum += counts[i];
+      if (stride >= 4) {
+        limit = (256 * sum + stride / 2) / stride;
+      }
+      if (stride == 4) {
+        limit += 120;
+      }
+    }
+  }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+                             bool *use_rle_for_non_zero,
+                             bool *use_rle_for_zero) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  for (size_t i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (reps >= 3 && value == 0) {
+      total_reps_zero += reps;
+      ++count_reps_zero;
+    }
+    if (reps >= 4 && value != 0) {
+      total_reps_non_zero += reps;
+      ++count_reps_non_zero;
+    }
+    i += reps;
+  }
+  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
+  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
+}
+
+void WriteHuffmanTree(const uint8_t* depth,
+                      size_t length,
+                      size_t* tree_size,
+                      uint8_t* tree,
+                      uint8_t* extra_bits_data) {
+  uint8_t previous_value = 8;
+
+  // Throw away trailing zeros.
+  size_t new_length = length;
+  for (size_t i = 0; i < length; ++i) {
+    if (depth[length - i - 1] == 0) {
+      --new_length;
+    } else {
+      break;
+    }
+  }
+
+  // First gather statistics on if it is a good idea to do rle.
+  bool use_rle_for_non_zero = false;
+  bool use_rle_for_zero = false;
+  if (length > 50) {
+    // Find rle coding for longer codes.
+    // Shorter codes seem not to benefit from rle.
+    DecideOverRleUse(depth, new_length,
+                     &use_rle_for_non_zero, &use_rle_for_zero);
+  }
+
+  // Actual rle coding.
+  for (size_t i = 0; i < new_length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    if ((value != 0 && use_rle_for_non_zero) ||
+        (value == 0 && use_rle_for_zero)) {
+      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+        ++reps;
+      }
+    }
+    if (value == 0) {
+      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
+    } else {
+      WriteHuffmanTreeRepetitions(previous_value,
+                                  value, reps, tree_size,
+                                  tree, extra_bits_data);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {  // Pre-reversed 4-bit values.
+    0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+    0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+  };
+  size_t retval = kLut[bits & 0xf];
+  for (int i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits = static_cast<uint16_t>(bits >> 4);
+    retval |= kLut[bits & 0xf];
+  }
+  retval >>= (-num_bits & 0x3);
+  return static_cast<uint16_t>(retval);
+}
+
+}  // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t *depth,
+                               size_t len,
+                               uint16_t *bits) {
+  // In Brotli, all bit depths are [1..15]
+  // 0 bit depth means that the symbol does not exist.
+  const int kMaxBits = 16;  // 0..15 are values for bits
+  uint16_t bl_count[kMaxBits] = { 0 };
+  {
+    for (size_t i = 0; i < len; ++i) {
+      ++bl_count[depth[i]];
+    }
+    bl_count[0] = 0;
+  }
+  uint16_t next_code[kMaxBits];
+  next_code[0] = 0;
+  {
+    int code = 0;
+    for (int bits = 1; bits < kMaxBits; ++bits) {
+      code = (code + bl_count[bits - 1]) << 1;
+      next_code[bits] = static_cast<uint16_t>(code);
+    }
+  }
+  for (size_t i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/entropy_encode.h
+++ b/modules/brotli/enc/entropy_encode.h
@ -0,0 +1,104 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Entropy encoding (Huffman) utilities.
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include <string.h>
+#include "./histogram.h"
+#include "./prefix.h"
+#include "./types.h"
+
+namespace brotli {
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+  HuffmanTree() {}
+  HuffmanTree(uint32_t count, int16_t left, int16_t right)
+      : total_count_(count),
+        index_left_(left),
+        index_right_or_value_(right) {
+  }
+  uint32_t total_count_;
+  int16_t index_left_;
+  int16_t index_right_or_value_;
+};
+
+void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
+              uint8_t *depth, uint8_t level);
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// The actual Huffman tree is constructed in the tree[] array, which has to
+// be at least 2 * length + 1 long.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t *data,
+                       const size_t length,
+                       const int tree_limit,
+                       HuffmanTree* tree,
+                       uint8_t *depth);
+
+// Change the population counts in a way that the consequent
+// Huffman tree compression, especially its rle-part will be more
+// likely to compress this data more efficiently.
+//
+// length contains the size of the histogram.
+// counts contains the population counts.
+// good_for_rle is a buffer of at least length size
+void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
+                                 uint8_t* good_for_rle);
+
+// Write a Huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth,
+                      size_t num,
+                      size_t* tree_size,
+                      uint8_t* tree,
+                      uint8_t* extra_bits_data);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t *depth,
+                               size_t len,
+                               uint16_t *bits);
+
+template<int kSize>
+struct EntropyCode {
+  // How many bits for symbol.
+  uint8_t depth_[kSize];
+  // Actual bits used to represent the symbol.
+  uint16_t bits_[kSize];
+  // How many non-zero depth.
+  int count_;
+  // First four symbols with non-zero depth.
+  int symbols_[4];
+};
+
+static const int kCodeLengthCodes = 18;
+
+// Literal entropy code.
+typedef EntropyCode<256> EntropyCodeLiteral;
+// Prefix entropy codes.
+typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
+typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
+typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
+// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
+typedef EntropyCode<272> EntropyCodeContextMap;
+// Block type entropy code, 256 block types + 2 special symbols.
+typedef EntropyCode<258> EntropyCodeBlockType;
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENTROPY_ENCODE_H_
--- a/modules/brotli/enc/entropy_encode_static.h
+++ b/modules/brotli/enc/entropy_encode_static.h
@ -0,0 +1,572 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Static entropy codes used for faster meta-block encoding.
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
+
+#include "./prefix.h"
+#include "./types.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+static const uint8_t kCodeLengthDepth[18] = {
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
+};
+
+static const uint8_t kStaticCommandCodeDepth[kNumCommandPrefixes] = {
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+};
+
+static const uint8_t kStaticDistanceCodeDepth[64] = {
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+};
+
+static const uint32_t kCodeLengthBits[18] = {
+  0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
+};
+
+inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
+  WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
+}
+
+static const uint64_t kZeroRepsBits[704] = {
+  0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
+  0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
+  0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
+  0x00003b87, 0x00000397, 0x00000b97, 0x00001397, 0x00001b97, 0x00002397,
+  0x00002b97, 0x00003397, 0x00003b97, 0x000003a7, 0x00000ba7, 0x000013a7,
+  0x00001ba7, 0x000023a7, 0x00002ba7, 0x000033a7, 0x00003ba7, 0x000003b7,
+  0x00000bb7, 0x000013b7, 0x00001bb7, 0x000023b7, 0x00002bb7, 0x000033b7,
+  0x00003bb7, 0x000003c7, 0x00000bc7, 0x000013c7, 0x00001bc7, 0x000023c7,
+  0x00002bc7, 0x000033c7, 0x00003bc7, 0x000003d7, 0x00000bd7, 0x000013d7,
+  0x00001bd7, 0x000023d7, 0x00002bd7, 0x000033d7, 0x00003bd7, 0x000003e7,
+  0x00000be7, 0x000013e7, 0x00001be7, 0x000023e7, 0x00002be7, 0x000033e7,
+  0x00003be7, 0x000003f7, 0x00000bf7, 0x000013f7, 0x00001bf7, 0x000023f7,
+  0x00002bf7, 0x000033f7, 0x00003bf7, 0x0001c387, 0x0005c387, 0x0009c387,
+  0x000dc387, 0x0011c387, 0x0015c387, 0x0019c387, 0x001dc387, 0x0001cb87,
+  0x0005cb87, 0x0009cb87, 0x000dcb87, 0x0011cb87, 0x0015cb87, 0x0019cb87,
+  0x001dcb87, 0x0001d387, 0x0005d387, 0x0009d387, 0x000dd387, 0x0011d387,
+  0x0015d387, 0x0019d387, 0x001dd387, 0x0001db87, 0x0005db87, 0x0009db87,
+  0x000ddb87, 0x0011db87, 0x0015db87, 0x0019db87, 0x001ddb87, 0x0001e387,
+  0x0005e387, 0x0009e387, 0x000de387, 0x0011e387, 0x0015e387, 0x0019e387,
+  0x001de387, 0x0001eb87, 0x0005eb87, 0x0009eb87, 0x000deb87, 0x0011eb87,
+  0x0015eb87, 0x0019eb87, 0x001deb87, 0x0001f387, 0x0005f387, 0x0009f387,
+  0x000df387, 0x0011f387, 0x0015f387, 0x0019f387, 0x001df387, 0x0001fb87,
+  0x0005fb87, 0x0009fb87, 0x000dfb87, 0x0011fb87, 0x0015fb87, 0x0019fb87,
+  0x001dfb87, 0x0001c397, 0x0005c397, 0x0009c397, 0x000dc397, 0x0011c397,
+  0x0015c397, 0x0019c397, 0x001dc397, 0x0001cb97, 0x0005cb97, 0x0009cb97,
+  0x000dcb97, 0x0011cb97, 0x0015cb97, 0x0019cb97, 0x001dcb97, 0x0001d397,
+  0x0005d397, 0x0009d397, 0x000dd397, 0x0011d397, 0x0015d397, 0x0019d397,
+  0x001dd397, 0x0001db97, 0x0005db97, 0x0009db97, 0x000ddb97, 0x0011db97,
+  0x0015db97, 0x0019db97, 0x001ddb97, 0x0001e397, 0x0005e397, 0x0009e397,
+  0x000de397, 0x0011e397, 0x0015e397, 0x0019e397, 0x001de397, 0x0001eb97,
+  0x0005eb97, 0x0009eb97, 0x000deb97, 0x0011eb97, 0x0015eb97, 0x0019eb97,
+  0x001deb97, 0x0001f397, 0x0005f397, 0x0009f397, 0x000df397, 0x0011f397,
+  0x0015f397, 0x0019f397, 0x001df397, 0x0001fb97, 0x0005fb97, 0x0009fb97,
+  0x000dfb97, 0x0011fb97, 0x0015fb97, 0x0019fb97, 0x001dfb97, 0x0001c3a7,
+  0x0005c3a7, 0x0009c3a7, 0x000dc3a7, 0x0011c3a7, 0x0015c3a7, 0x0019c3a7,
+  0x001dc3a7, 0x0001cba7, 0x0005cba7, 0x0009cba7, 0x000dcba7, 0x0011cba7,
+  0x0015cba7, 0x0019cba7, 0x001dcba7, 0x0001d3a7, 0x0005d3a7, 0x0009d3a7,
+  0x000dd3a7, 0x0011d3a7, 0x0015d3a7, 0x0019d3a7, 0x001dd3a7, 0x0001dba7,
+  0x0005dba7, 0x0009dba7, 0x000ddba7, 0x0011dba7, 0x0015dba7, 0x0019dba7,
+  0x001ddba7, 0x0001e3a7, 0x0005e3a7, 0x0009e3a7, 0x000de3a7, 0x0011e3a7,
+  0x0015e3a7, 0x0019e3a7, 0x001de3a7, 0x0001eba7, 0x0005eba7, 0x0009eba7,
+  0x000deba7, 0x0011eba7, 0x0015eba7, 0x0019eba7, 0x001deba7, 0x0001f3a7,
+  0x0005f3a7, 0x0009f3a7, 0x000df3a7, 0x0011f3a7, 0x0015f3a7, 0x0019f3a7,
+  0x001df3a7, 0x0001fba7, 0x0005fba7, 0x0009fba7, 0x000dfba7, 0x0011fba7,
+  0x0015fba7, 0x0019fba7, 0x001dfba7, 0x0001c3b7, 0x0005c3b7, 0x0009c3b7,
+  0x000dc3b7, 0x0011c3b7, 0x0015c3b7, 0x0019c3b7, 0x001dc3b7, 0x0001cbb7,
+  0x0005cbb7, 0x0009cbb7, 0x000dcbb7, 0x0011cbb7, 0x0015cbb7, 0x0019cbb7,
+  0x001dcbb7, 0x0001d3b7, 0x0005d3b7, 0x0009d3b7, 0x000dd3b7, 0x0011d3b7,
+  0x0015d3b7, 0x0019d3b7, 0x001dd3b7, 0x0001dbb7, 0x0005dbb7, 0x0009dbb7,
+  0x000ddbb7, 0x0011dbb7, 0x0015dbb7, 0x0019dbb7, 0x001ddbb7, 0x0001e3b7,
+  0x0005e3b7, 0x0009e3b7, 0x000de3b7, 0x0011e3b7, 0x0015e3b7, 0x0019e3b7,
+  0x001de3b7, 0x0001ebb7, 0x0005ebb7, 0x0009ebb7, 0x000debb7, 0x0011ebb7,
+  0x0015ebb7, 0x0019ebb7, 0x001debb7, 0x0001f3b7, 0x0005f3b7, 0x0009f3b7,
+  0x000df3b7, 0x0011f3b7, 0x0015f3b7, 0x0019f3b7, 0x001df3b7, 0x0001fbb7,
+  0x0005fbb7, 0x0009fbb7, 0x000dfbb7, 0x0011fbb7, 0x0015fbb7, 0x0019fbb7,
+  0x001dfbb7, 0x0001c3c7, 0x0005c3c7, 0x0009c3c7, 0x000dc3c7, 0x0011c3c7,
+  0x0015c3c7, 0x0019c3c7, 0x001dc3c7, 0x0001cbc7, 0x0005cbc7, 0x0009cbc7,
+  0x000dcbc7, 0x0011cbc7, 0x0015cbc7, 0x0019cbc7, 0x001dcbc7, 0x0001d3c7,
+  0x0005d3c7, 0x0009d3c7, 0x000dd3c7, 0x0011d3c7, 0x0015d3c7, 0x0019d3c7,
+  0x001dd3c7, 0x0001dbc7, 0x0005dbc7, 0x0009dbc7, 0x000ddbc7, 0x0011dbc7,
+  0x0015dbc7, 0x0019dbc7, 0x001ddbc7, 0x0001e3c7, 0x0005e3c7, 0x0009e3c7,
+  0x000de3c7, 0x0011e3c7, 0x0015e3c7, 0x0019e3c7, 0x001de3c7, 0x0001ebc7,
+  0x0005ebc7, 0x0009ebc7, 0x000debc7, 0x0011ebc7, 0x0015ebc7, 0x0019ebc7,
+  0x001debc7, 0x0001f3c7, 0x0005f3c7, 0x0009f3c7, 0x000df3c7, 0x0011f3c7,
+  0x0015f3c7, 0x0019f3c7, 0x001df3c7, 0x0001fbc7, 0x0005fbc7, 0x0009fbc7,
+  0x000dfbc7, 0x0011fbc7, 0x0015fbc7, 0x0019fbc7, 0x001dfbc7, 0x0001c3d7,
+  0x0005c3d7, 0x0009c3d7, 0x000dc3d7, 0x0011c3d7, 0x0015c3d7, 0x0019c3d7,
+  0x001dc3d7, 0x0001cbd7, 0x0005cbd7, 0x0009cbd7, 0x000dcbd7, 0x0011cbd7,
+  0x0015cbd7, 0x0019cbd7, 0x001dcbd7, 0x0001d3d7, 0x0005d3d7, 0x0009d3d7,
+  0x000dd3d7, 0x0011d3d7, 0x0015d3d7, 0x0019d3d7, 0x001dd3d7, 0x0001dbd7,
+  0x0005dbd7, 0x0009dbd7, 0x000ddbd7, 0x0011dbd7, 0x0015dbd7, 0x0019dbd7,
+  0x001ddbd7, 0x0001e3d7, 0x0005e3d7, 0x0009e3d7, 0x000de3d7, 0x0011e3d7,
+  0x0015e3d7, 0x0019e3d7, 0x001de3d7, 0x0001ebd7, 0x0005ebd7, 0x0009ebd7,
+  0x000debd7, 0x0011ebd7, 0x0015ebd7, 0x0019ebd7, 0x001debd7, 0x0001f3d7,
+  0x0005f3d7, 0x0009f3d7, 0x000df3d7, 0x0011f3d7, 0x0015f3d7, 0x0019f3d7,
+  0x001df3d7, 0x0001fbd7, 0x0005fbd7, 0x0009fbd7, 0x000dfbd7, 0x0011fbd7,
+  0x0015fbd7, 0x0019fbd7, 0x001dfbd7, 0x0001c3e7, 0x0005c3e7, 0x0009c3e7,
+  0x000dc3e7, 0x0011c3e7, 0x0015c3e7, 0x0019c3e7, 0x001dc3e7, 0x0001cbe7,
+  0x0005cbe7, 0x0009cbe7, 0x000dcbe7, 0x0011cbe7, 0x0015cbe7, 0x0019cbe7,
+  0x001dcbe7, 0x0001d3e7, 0x0005d3e7, 0x0009d3e7, 0x000dd3e7, 0x0011d3e7,
+  0x0015d3e7, 0x0019d3e7, 0x001dd3e7, 0x0001dbe7, 0x0005dbe7, 0x0009dbe7,
+  0x000ddbe7, 0x0011dbe7, 0x0015dbe7, 0x0019dbe7, 0x001ddbe7, 0x0001e3e7,
+  0x0005e3e7, 0x0009e3e7, 0x000de3e7, 0x0011e3e7, 0x0015e3e7, 0x0019e3e7,
+  0x001de3e7, 0x0001ebe7, 0x0005ebe7, 0x0009ebe7, 0x000debe7, 0x0011ebe7,
+  0x0015ebe7, 0x0019ebe7, 0x001debe7, 0x0001f3e7, 0x0005f3e7, 0x0009f3e7,
+  0x000df3e7, 0x0011f3e7, 0x0015f3e7, 0x0019f3e7, 0x001df3e7, 0x0001fbe7,
+  0x0005fbe7, 0x0009fbe7, 0x000dfbe7, 0x0011fbe7, 0x0015fbe7, 0x0019fbe7,
+  0x001dfbe7, 0x0001c3f7, 0x0005c3f7, 0x0009c3f7, 0x000dc3f7, 0x0011c3f7,
+  0x0015c3f7, 0x0019c3f7, 0x001dc3f7, 0x0001cbf7, 0x0005cbf7, 0x0009cbf7,
+  0x000dcbf7, 0x0011cbf7, 0x0015cbf7, 0x0019cbf7, 0x001dcbf7, 0x0001d3f7,
+  0x0005d3f7, 0x0009d3f7, 0x000dd3f7, 0x0011d3f7, 0x0015d3f7, 0x0019d3f7,
+  0x001dd3f7, 0x0001dbf7, 0x0005dbf7, 0x0009dbf7, 0x000ddbf7, 0x0011dbf7,
+  0x0015dbf7, 0x0019dbf7, 0x001ddbf7, 0x0001e3f7, 0x0005e3f7, 0x0009e3f7,
+  0x000de3f7, 0x0011e3f7, 0x0015e3f7, 0x0019e3f7, 0x001de3f7, 0x0001ebf7,
+  0x0005ebf7, 0x0009ebf7, 0x000debf7, 0x0011ebf7, 0x0015ebf7, 0x0019ebf7,
+  0x001debf7, 0x0001f3f7, 0x0005f3f7, 0x0009f3f7, 0x000df3f7, 0x0011f3f7,
+  0x0015f3f7, 0x0019f3f7, 0x001df3f7, 0x0001fbf7, 0x0005fbf7, 0x0009fbf7,
+  0x000dfbf7, 0x0011fbf7, 0x0015fbf7, 0x0019fbf7, 0x001dfbf7, 0x00e1c387,
+  0x02e1c387, 0x04e1c387, 0x06e1c387, 0x08e1c387, 0x0ae1c387, 0x0ce1c387,
+  0x0ee1c387, 0x00e5c387, 0x02e5c387, 0x04e5c387, 0x06e5c387, 0x08e5c387,
+  0x0ae5c387, 0x0ce5c387, 0x0ee5c387, 0x00e9c387, 0x02e9c387, 0x04e9c387,
+  0x06e9c387, 0x08e9c387, 0x0ae9c387, 0x0ce9c387, 0x0ee9c387, 0x00edc387,
+  0x02edc387, 0x04edc387, 0x06edc387, 0x08edc387, 0x0aedc387, 0x0cedc387,
+  0x0eedc387, 0x00f1c387, 0x02f1c387, 0x04f1c387, 0x06f1c387, 0x08f1c387,
+  0x0af1c387, 0x0cf1c387, 0x0ef1c387, 0x00f5c387, 0x02f5c387, 0x04f5c387,
+  0x06f5c387, 0x08f5c387, 0x0af5c387, 0x0cf5c387, 0x0ef5c387, 0x00f9c387,
+  0x02f9c387, 0x04f9c387, 0x06f9c387, 0x08f9c387, 0x0af9c387, 0x0cf9c387,
+  0x0ef9c387, 0x00fdc387, 0x02fdc387, 0x04fdc387, 0x06fdc387, 0x08fdc387,
+  0x0afdc387, 0x0cfdc387, 0x0efdc387, 0x00e1cb87, 0x02e1cb87, 0x04e1cb87,
+  0x06e1cb87, 0x08e1cb87, 0x0ae1cb87, 0x0ce1cb87, 0x0ee1cb87, 0x00e5cb87,
+  0x02e5cb87, 0x04e5cb87, 0x06e5cb87, 0x08e5cb87, 0x0ae5cb87, 0x0ce5cb87,
+  0x0ee5cb87, 0x00e9cb87, 0x02e9cb87, 0x04e9cb87, 0x06e9cb87, 0x08e9cb87,
+  0x0ae9cb87, 0x0ce9cb87, 0x0ee9cb87, 0x00edcb87, 0x02edcb87, 0x04edcb87,
+  0x06edcb87, 0x08edcb87, 0x0aedcb87, 0x0cedcb87, 0x0eedcb87, 0x00f1cb87,
+  0x02f1cb87, 0x04f1cb87, 0x06f1cb87, 0x08f1cb87, 0x0af1cb87, 0x0cf1cb87,
+  0x0ef1cb87, 0x00f5cb87, 0x02f5cb87, 0x04f5cb87, 0x06f5cb87, 0x08f5cb87,
+  0x0af5cb87, 0x0cf5cb87, 0x0ef5cb87, 0x00f9cb87, 0x02f9cb87, 0x04f9cb87,
+  0x06f9cb87, 0x08f9cb87,
+};
+
+static const uint32_t kZeroRepsDepth[704] = {
+   0,  4,  8,  7,  7,  7,  7,  7,  7,  7,  7, 11, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
+  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+};
+
+static const uint64_t kNonZeroRepsBits[704] = {
+  0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
+  0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
+  0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
+  0x00000afb, 0x00000efb, 0x0000b2cb, 0x0001b2cb, 0x0002b2cb, 0x0003b2cb,
+  0x0000b6cb, 0x0001b6cb, 0x0002b6cb, 0x0003b6cb, 0x0000bacb, 0x0001bacb,
+  0x0002bacb, 0x0003bacb, 0x0000becb, 0x0001becb, 0x0002becb, 0x0003becb,
+  0x0000b2db, 0x0001b2db, 0x0002b2db, 0x0003b2db, 0x0000b6db, 0x0001b6db,
+  0x0002b6db, 0x0003b6db, 0x0000badb, 0x0001badb, 0x0002badb, 0x0003badb,
+  0x0000bedb, 0x0001bedb, 0x0002bedb, 0x0003bedb, 0x0000b2eb, 0x0001b2eb,
+  0x0002b2eb, 0x0003b2eb, 0x0000b6eb, 0x0001b6eb, 0x0002b6eb, 0x0003b6eb,
+  0x0000baeb, 0x0001baeb, 0x0002baeb, 0x0003baeb, 0x0000beeb, 0x0001beeb,
+  0x0002beeb, 0x0003beeb, 0x0000b2fb, 0x0001b2fb, 0x0002b2fb, 0x0003b2fb,
+  0x0000b6fb, 0x0001b6fb, 0x0002b6fb, 0x0003b6fb, 0x0000bafb, 0x0001bafb,
+  0x0002bafb, 0x0003bafb, 0x0000befb, 0x0001befb, 0x0002befb, 0x0003befb,
+  0x002cb2cb, 0x006cb2cb, 0x00acb2cb, 0x00ecb2cb, 0x002db2cb, 0x006db2cb,
+  0x00adb2cb, 0x00edb2cb, 0x002eb2cb, 0x006eb2cb, 0x00aeb2cb, 0x00eeb2cb,
+  0x002fb2cb, 0x006fb2cb, 0x00afb2cb, 0x00efb2cb, 0x002cb6cb, 0x006cb6cb,
+  0x00acb6cb, 0x00ecb6cb, 0x002db6cb, 0x006db6cb, 0x00adb6cb, 0x00edb6cb,
+  0x002eb6cb, 0x006eb6cb, 0x00aeb6cb, 0x00eeb6cb, 0x002fb6cb, 0x006fb6cb,
+  0x00afb6cb, 0x00efb6cb, 0x002cbacb, 0x006cbacb, 0x00acbacb, 0x00ecbacb,
+  0x002dbacb, 0x006dbacb, 0x00adbacb, 0x00edbacb, 0x002ebacb, 0x006ebacb,
+  0x00aebacb, 0x00eebacb, 0x002fbacb, 0x006fbacb, 0x00afbacb, 0x00efbacb,
+  0x002cbecb, 0x006cbecb, 0x00acbecb, 0x00ecbecb, 0x002dbecb, 0x006dbecb,
+  0x00adbecb, 0x00edbecb, 0x002ebecb, 0x006ebecb, 0x00aebecb, 0x00eebecb,
+  0x002fbecb, 0x006fbecb, 0x00afbecb, 0x00efbecb, 0x002cb2db, 0x006cb2db,
+  0x00acb2db, 0x00ecb2db, 0x002db2db, 0x006db2db, 0x00adb2db, 0x00edb2db,
+  0x002eb2db, 0x006eb2db, 0x00aeb2db, 0x00eeb2db, 0x002fb2db, 0x006fb2db,
+  0x00afb2db, 0x00efb2db, 0x002cb6db, 0x006cb6db, 0x00acb6db, 0x00ecb6db,
+  0x002db6db, 0x006db6db, 0x00adb6db, 0x00edb6db, 0x002eb6db, 0x006eb6db,
+  0x00aeb6db, 0x00eeb6db, 0x002fb6db, 0x006fb6db, 0x00afb6db, 0x00efb6db,
+  0x002cbadb, 0x006cbadb, 0x00acbadb, 0x00ecbadb, 0x002dbadb, 0x006dbadb,
+  0x00adbadb, 0x00edbadb, 0x002ebadb, 0x006ebadb, 0x00aebadb, 0x00eebadb,
+  0x002fbadb, 0x006fbadb, 0x00afbadb, 0x00efbadb, 0x002cbedb, 0x006cbedb,
+  0x00acbedb, 0x00ecbedb, 0x002dbedb, 0x006dbedb, 0x00adbedb, 0x00edbedb,
+  0x002ebedb, 0x006ebedb, 0x00aebedb, 0x00eebedb, 0x002fbedb, 0x006fbedb,
+  0x00afbedb, 0x00efbedb, 0x002cb2eb, 0x006cb2eb, 0x00acb2eb, 0x00ecb2eb,
+  0x002db2eb, 0x006db2eb, 0x00adb2eb, 0x00edb2eb, 0x002eb2eb, 0x006eb2eb,
+  0x00aeb2eb, 0x00eeb2eb, 0x002fb2eb, 0x006fb2eb, 0x00afb2eb, 0x00efb2eb,
+  0x002cb6eb, 0x006cb6eb, 0x00acb6eb, 0x00ecb6eb, 0x002db6eb, 0x006db6eb,
+  0x00adb6eb, 0x00edb6eb, 0x002eb6eb, 0x006eb6eb, 0x00aeb6eb, 0x00eeb6eb,
+  0x002fb6eb, 0x006fb6eb, 0x00afb6eb, 0x00efb6eb, 0x002cbaeb, 0x006cbaeb,
+  0x00acbaeb, 0x00ecbaeb, 0x002dbaeb, 0x006dbaeb, 0x00adbaeb, 0x00edbaeb,
+  0x002ebaeb, 0x006ebaeb, 0x00aebaeb, 0x00eebaeb, 0x002fbaeb, 0x006fbaeb,
+  0x00afbaeb, 0x00efbaeb, 0x002cbeeb, 0x006cbeeb, 0x00acbeeb, 0x00ecbeeb,
+  0x002dbeeb, 0x006dbeeb, 0x00adbeeb, 0x00edbeeb, 0x002ebeeb, 0x006ebeeb,
+  0x00aebeeb, 0x00eebeeb, 0x002fbeeb, 0x006fbeeb, 0x00afbeeb, 0x00efbeeb,
+  0x002cb2fb, 0x006cb2fb, 0x00acb2fb, 0x00ecb2fb, 0x002db2fb, 0x006db2fb,
+  0x00adb2fb, 0x00edb2fb, 0x002eb2fb, 0x006eb2fb, 0x00aeb2fb, 0x00eeb2fb,
+  0x002fb2fb, 0x006fb2fb, 0x00afb2fb, 0x00efb2fb, 0x002cb6fb, 0x006cb6fb,
+  0x00acb6fb, 0x00ecb6fb, 0x002db6fb, 0x006db6fb, 0x00adb6fb, 0x00edb6fb,
+  0x002eb6fb, 0x006eb6fb, 0x00aeb6fb, 0x00eeb6fb, 0x002fb6fb, 0x006fb6fb,
+  0x00afb6fb, 0x00efb6fb, 0x002cbafb, 0x006cbafb, 0x00acbafb, 0x00ecbafb,
+  0x002dbafb, 0x006dbafb, 0x00adbafb, 0x00edbafb, 0x002ebafb, 0x006ebafb,
+  0x00aebafb, 0x00eebafb, 0x002fbafb, 0x006fbafb, 0x00afbafb, 0x00efbafb,
+  0x002cbefb, 0x006cbefb, 0x00acbefb, 0x00ecbefb, 0x002dbefb, 0x006dbefb,
+  0x00adbefb, 0x00edbefb, 0x002ebefb, 0x006ebefb, 0x00aebefb, 0x00eebefb,
+  0x002fbefb, 0x006fbefb, 0x00afbefb, 0x00efbefb, 0x0b2cb2cb, 0x1b2cb2cb,
+  0x2b2cb2cb, 0x3b2cb2cb, 0x0b6cb2cb, 0x1b6cb2cb, 0x2b6cb2cb, 0x3b6cb2cb,
+  0x0bacb2cb, 0x1bacb2cb, 0x2bacb2cb, 0x3bacb2cb, 0x0becb2cb, 0x1becb2cb,
+  0x2becb2cb, 0x3becb2cb, 0x0b2db2cb, 0x1b2db2cb, 0x2b2db2cb, 0x3b2db2cb,
+  0x0b6db2cb, 0x1b6db2cb, 0x2b6db2cb, 0x3b6db2cb, 0x0badb2cb, 0x1badb2cb,
+  0x2badb2cb, 0x3badb2cb, 0x0bedb2cb, 0x1bedb2cb, 0x2bedb2cb, 0x3bedb2cb,
+  0x0b2eb2cb, 0x1b2eb2cb, 0x2b2eb2cb, 0x3b2eb2cb, 0x0b6eb2cb, 0x1b6eb2cb,
+  0x2b6eb2cb, 0x3b6eb2cb, 0x0baeb2cb, 0x1baeb2cb, 0x2baeb2cb, 0x3baeb2cb,
+  0x0beeb2cb, 0x1beeb2cb, 0x2beeb2cb, 0x3beeb2cb, 0x0b2fb2cb, 0x1b2fb2cb,
+  0x2b2fb2cb, 0x3b2fb2cb, 0x0b6fb2cb, 0x1b6fb2cb, 0x2b6fb2cb, 0x3b6fb2cb,
+  0x0bafb2cb, 0x1bafb2cb, 0x2bafb2cb, 0x3bafb2cb, 0x0befb2cb, 0x1befb2cb,
+  0x2befb2cb, 0x3befb2cb, 0x0b2cb6cb, 0x1b2cb6cb, 0x2b2cb6cb, 0x3b2cb6cb,
+  0x0b6cb6cb, 0x1b6cb6cb, 0x2b6cb6cb, 0x3b6cb6cb, 0x0bacb6cb, 0x1bacb6cb,
+  0x2bacb6cb, 0x3bacb6cb, 0x0becb6cb, 0x1becb6cb, 0x2becb6cb, 0x3becb6cb,
+  0x0b2db6cb, 0x1b2db6cb, 0x2b2db6cb, 0x3b2db6cb, 0x0b6db6cb, 0x1b6db6cb,
+  0x2b6db6cb, 0x3b6db6cb, 0x0badb6cb, 0x1badb6cb, 0x2badb6cb, 0x3badb6cb,
+  0x0bedb6cb, 0x1bedb6cb, 0x2bedb6cb, 0x3bedb6cb, 0x0b2eb6cb, 0x1b2eb6cb,
+  0x2b2eb6cb, 0x3b2eb6cb, 0x0b6eb6cb, 0x1b6eb6cb, 0x2b6eb6cb, 0x3b6eb6cb,
+  0x0baeb6cb, 0x1baeb6cb, 0x2baeb6cb, 0x3baeb6cb, 0x0beeb6cb, 0x1beeb6cb,
+  0x2beeb6cb, 0x3beeb6cb, 0x0b2fb6cb, 0x1b2fb6cb, 0x2b2fb6cb, 0x3b2fb6cb,
+  0x0b6fb6cb, 0x1b6fb6cb, 0x2b6fb6cb, 0x3b6fb6cb, 0x0bafb6cb, 0x1bafb6cb,
+  0x2bafb6cb, 0x3bafb6cb, 0x0befb6cb, 0x1befb6cb, 0x2befb6cb, 0x3befb6cb,
+  0x0b2cbacb, 0x1b2cbacb, 0x2b2cbacb, 0x3b2cbacb, 0x0b6cbacb, 0x1b6cbacb,
+  0x2b6cbacb, 0x3b6cbacb, 0x0bacbacb, 0x1bacbacb, 0x2bacbacb, 0x3bacbacb,
+  0x0becbacb, 0x1becbacb, 0x2becbacb, 0x3becbacb, 0x0b2dbacb, 0x1b2dbacb,
+  0x2b2dbacb, 0x3b2dbacb, 0x0b6dbacb, 0x1b6dbacb, 0x2b6dbacb, 0x3b6dbacb,
+  0x0badbacb, 0x1badbacb, 0x2badbacb, 0x3badbacb, 0x0bedbacb, 0x1bedbacb,
+  0x2bedbacb, 0x3bedbacb, 0x0b2ebacb, 0x1b2ebacb, 0x2b2ebacb, 0x3b2ebacb,
+  0x0b6ebacb, 0x1b6ebacb, 0x2b6ebacb, 0x3b6ebacb, 0x0baebacb, 0x1baebacb,
+  0x2baebacb, 0x3baebacb, 0x0beebacb, 0x1beebacb, 0x2beebacb, 0x3beebacb,
+  0x0b2fbacb, 0x1b2fbacb, 0x2b2fbacb, 0x3b2fbacb, 0x0b6fbacb, 0x1b6fbacb,
+  0x2b6fbacb, 0x3b6fbacb, 0x0bafbacb, 0x1bafbacb, 0x2bafbacb, 0x3bafbacb,
+  0x0befbacb, 0x1befbacb, 0x2befbacb, 0x3befbacb, 0x0b2cbecb, 0x1b2cbecb,
+  0x2b2cbecb, 0x3b2cbecb, 0x0b6cbecb, 0x1b6cbecb, 0x2b6cbecb, 0x3b6cbecb,
+  0x0bacbecb, 0x1bacbecb, 0x2bacbecb, 0x3bacbecb, 0x0becbecb, 0x1becbecb,
+  0x2becbecb, 0x3becbecb, 0x0b2dbecb, 0x1b2dbecb, 0x2b2dbecb, 0x3b2dbecb,
+  0x0b6dbecb, 0x1b6dbecb, 0x2b6dbecb, 0x3b6dbecb, 0x0badbecb, 0x1badbecb,
+  0x2badbecb, 0x3badbecb, 0x0bedbecb, 0x1bedbecb, 0x2bedbecb, 0x3bedbecb,
+  0x0b2ebecb, 0x1b2ebecb, 0x2b2ebecb, 0x3b2ebecb, 0x0b6ebecb, 0x1b6ebecb,
+  0x2b6ebecb, 0x3b6ebecb, 0x0baebecb, 0x1baebecb, 0x2baebecb, 0x3baebecb,
+  0x0beebecb, 0x1beebecb, 0x2beebecb, 0x3beebecb, 0x0b2fbecb, 0x1b2fbecb,
+  0x2b2fbecb, 0x3b2fbecb, 0x0b6fbecb, 0x1b6fbecb, 0x2b6fbecb, 0x3b6fbecb,
+  0x0bafbecb, 0x1bafbecb, 0x2bafbecb, 0x3bafbecb, 0x0befbecb, 0x1befbecb,
+  0x2befbecb, 0x3befbecb, 0x0b2cb2db, 0x1b2cb2db, 0x2b2cb2db, 0x3b2cb2db,
+  0x0b6cb2db, 0x1b6cb2db, 0x2b6cb2db, 0x3b6cb2db, 0x0bacb2db, 0x1bacb2db,
+  0x2bacb2db, 0x3bacb2db, 0x0becb2db, 0x1becb2db, 0x2becb2db, 0x3becb2db,
+  0x0b2db2db, 0x1b2db2db, 0x2b2db2db, 0x3b2db2db, 0x0b6db2db, 0x1b6db2db,
+  0x2b6db2db, 0x3b6db2db, 0x0badb2db, 0x1badb2db, 0x2badb2db, 0x3badb2db,
+  0x0bedb2db, 0x1bedb2db, 0x2bedb2db, 0x3bedb2db, 0x0b2eb2db, 0x1b2eb2db,
+  0x2b2eb2db, 0x3b2eb2db, 0x0b6eb2db, 0x1b6eb2db, 0x2b6eb2db, 0x3b6eb2db,
+  0x0baeb2db, 0x1baeb2db, 0x2baeb2db, 0x3baeb2db, 0x0beeb2db, 0x1beeb2db,
+  0x2beeb2db, 0x3beeb2db, 0x0b2fb2db, 0x1b2fb2db, 0x2b2fb2db, 0x3b2fb2db,
+  0x0b6fb2db, 0x1b6fb2db, 0x2b6fb2db, 0x3b6fb2db, 0x0bafb2db, 0x1bafb2db,
+  0x2bafb2db, 0x3bafb2db, 0x0befb2db, 0x1befb2db, 0x2befb2db, 0x3befb2db,
+  0x0b2cb6db, 0x1b2cb6db, 0x2b2cb6db, 0x3b2cb6db, 0x0b6cb6db, 0x1b6cb6db,
+  0x2b6cb6db, 0x3b6cb6db, 0x0bacb6db, 0x1bacb6db, 0x2bacb6db, 0x3bacb6db,
+  0x0becb6db, 0x1becb6db, 0x2becb6db, 0x3becb6db, 0x0b2db6db, 0x1b2db6db,
+  0x2b2db6db, 0x3b2db6db, 0x0b6db6db, 0x1b6db6db, 0x2b6db6db, 0x3b6db6db,
+  0x0badb6db, 0x1badb6db, 0x2badb6db, 0x3badb6db, 0x0bedb6db, 0x1bedb6db,
+  0x2bedb6db, 0x3bedb6db, 0x0b2eb6db, 0x1b2eb6db, 0x2b2eb6db, 0x3b2eb6db,
+  0x0b6eb6db, 0x1b6eb6db, 0x2b6eb6db, 0x3b6eb6db, 0x0baeb6db, 0x1baeb6db,
+  0x2baeb6db, 0x3baeb6db,
+};
+
+static const uint32_t kNonZeroRepsDepth[704] = {
+   6,  6,  6,  6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
+  18, 18, 18, 18, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+  24, 24, 24, 24, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
+};
+
+static const uint16_t kStaticLiteralCodeBits[256] = {
+    0,  128,   64,  192,   32,  160,   96,  224,
+   16,  144,   80,  208,   48,  176,  112,  240,
+    8,  136,   72,  200,   40,  168,  104,  232,
+   24,  152,   88,  216,   56,  184,  120,  248,
+    4,  132,   68,  196,   36,  164,  100,  228,
+   20,  148,   84,  212,   52,  180,  116,  244,
+   12,  140,   76,  204,   44,  172,  108,  236,
+   28,  156,   92,  220,   60,  188,  124,  252,
+    2,  130,   66,  194,   34,  162,   98,  226,
+   18,  146,   82,  210,   50,  178,  114,  242,
+   10,  138,   74,  202,   42,  170,  106,  234,
+   26,  154,   90,  218,   58,  186,  122,  250,
+    6,  134,   70,  198,   38,  166,  102,  230,
+   22,  150,   86,  214,   54,  182,  118,  246,
+   14,  142,   78,  206,   46,  174,  110,  238,
+   30,  158,   94,  222,   62,  190,  126,  254,
+    1,  129,   65,  193,   33,  161,   97,  225,
+   17,  145,   81,  209,   49,  177,  113,  241,
+    9,  137,   73,  201,   41,  169,  105,  233,
+   25,  153,   89,  217,   57,  185,  121,  249,
+    5,  133,   69,  197,   37,  165,  101,  229,
+   21,  149,   85,  213,   53,  181,  117,  245,
+   13,  141,   77,  205,   45,  173,  109,  237,
+   29,  157,   93,  221,   61,  189,  125,  253,
+    3,  131,   67,  195,   35,  163,   99,  227,
+   19,  147,   83,  211,   51,  179,  115,  243,
+   11,  139,   75,  203,   43,  171,  107,  235,
+   27,  155,   91,  219,   59,  187,  123,  251,
+    7,  135,   71,  199,   39,  167,  103,  231,
+   23,  151,   87,  215,   55,  183,  119,  247,
+   15,  143,   79,  207,   47,  175,  111,  239,
+   31,  159,   95,  223,   63,  191,  127,  255,
+};
+
+inline void StoreStaticLiteralHuffmanTree(size_t* storage_ix,
+                                          uint8_t* storage) {
+  WriteBits(32, 0x00010003U, storage_ix, storage);
+}
+
+static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
+    0,  256,  128,  384,   64,  320,  192,  448,
+   32,  288,  160,  416,   96,  352,  224,  480,
+   16,  272,  144,  400,   80,  336,  208,  464,
+   48,  304,  176,  432,  112,  368,  240,  496,
+    8,  264,  136,  392,   72,  328,  200,  456,
+   40,  296,  168,  424,  104,  360,  232,  488,
+   24,  280,  152,  408,   88,  344,  216,  472,
+   56,  312,  184,  440,  120,  376,  248,  504,
+    4,  260,  132,  388,   68,  324,  196,  452,
+   36,  292,  164,  420,  100,  356,  228,  484,
+   20,  276,  148,  404,   84,  340,  212,  468,
+   52,  308,  180,  436,  116,  372,  244,  500,
+   12,  268,  140,  396,   76,  332,  204,  460,
+   44,  300,  172,  428,  108,  364,  236,  492,
+   28,  284,  156,  412,   92,  348,  220,  476,
+   60,  316,  188,  444,  124,  380,  252,  508,
+    2,  258,  130,  386,   66,  322,  194,  450,
+   34,  290,  162,  418,   98,  354,  226,  482,
+   18,  274,  146,  402,   82,  338,  210,  466,
+   50,  306,  178,  434,  114,  370,  242,  498,
+   10,  266,  138,  394,   74,  330,  202,  458,
+   42,  298,  170,  426,  106,  362,  234,  490,
+   26,  282,  154,  410,   90,  346,  218,  474,
+   58,  314,  186,  442,  122,  378,  250,  506,
+    6,  262,  134,  390,   70,  326,  198,  454,
+   38,  294,  166,  422,  102,  358,  230,  486,
+   22,  278,  150,  406,   86,  342,  214,  470,
+   54,  310,  182,  438,  118,  374,  246,  502,
+   14,  270,  142,  398,   78,  334,  206,  462,
+   46,  302,  174,  430,  110,  366,  238,  494,
+   30,  286,  158,  414,   94,  350,  222,  478,
+   62,  318,  190,  446,  126,  382,  254,  510,
+    1,  257,  129,  385,   65,  321,  193,  449,
+   33,  289,  161,  417,   97,  353,  225,  481,
+   17,  273,  145,  401,   81,  337,  209,  465,
+   49,  305,  177,  433,  113,  369,  241,  497,
+    9,  265,  137,  393,   73,  329,  201,  457,
+   41,  297,  169,  425,  105,  361,  233,  489,
+   25,  281,  153,  409,   89,  345,  217,  473,
+   57,  313,  185,  441,  121,  377,  249,  505,
+    5,  261,  133,  389,   69,  325,  197,  453,
+   37,  293,  165,  421,  101,  357,  229,  485,
+   21,  277,  149,  405,   85,  341,  213,  469,
+   53,  309,  181,  437,  117,  373,  245,  501,
+   13,  269,  141,  397,   77,  333,  205,  461,
+   45,  301,  173,  429,  109,  365,  237,  493,
+   29,  285,  157,  413,   93,  349,  221,  477,
+   61,  317,  189,  445,  125,  381,  253,  509,
+    3,  259,  131,  387,   67,  323,  195,  451,
+   35,  291,  163,  419,   99,  355,  227,  483,
+   19,  275,  147,  403,   83,  339,  211,  467,
+   51,  307,  179,  435,  115,  371,  243,  499,
+   11,  267,  139,  395,   75,  331,  203,  459,
+   43,  299,  171,  427,  107,  363,  235,  491,
+   27,  283,  155,  411,   91,  347,  219,  475,
+   59,  315,  187,  443,  123,  379,  251,  507,
+    7, 1031,  519, 1543,  263, 1287,  775, 1799,
+  135, 1159,  647, 1671,  391, 1415,  903, 1927,
+   71, 1095,  583, 1607,  327, 1351,  839, 1863,
+  199, 1223,  711, 1735,  455, 1479,  967, 1991,
+   39, 1063,  551, 1575,  295, 1319,  807, 1831,
+  167, 1191,  679, 1703,  423, 1447,  935, 1959,
+  103, 1127,  615, 1639,  359, 1383,  871, 1895,
+  231, 1255,  743, 1767,  487, 1511,  999, 2023,
+   23, 1047,  535, 1559,  279, 1303,  791, 1815,
+  151, 1175,  663, 1687,  407, 1431,  919, 1943,
+   87, 1111,  599, 1623,  343, 1367,  855, 1879,
+  215, 1239,  727, 1751,  471, 1495,  983, 2007,
+   55, 1079,  567, 1591,  311, 1335,  823, 1847,
+  183, 1207,  695, 1719,  439, 1463,  951, 1975,
+  119, 1143,  631, 1655,  375, 1399,  887, 1911,
+  247, 1271,  759, 1783,  503, 1527, 1015, 2039,
+   15, 1039,  527, 1551,  271, 1295,  783, 1807,
+  143, 1167,  655, 1679,  399, 1423,  911, 1935,
+   79, 1103,  591, 1615,  335, 1359,  847, 1871,
+  207, 1231,  719, 1743,  463, 1487,  975, 1999,
+   47, 1071,  559, 1583,  303, 1327,  815, 1839,
+  175, 1199,  687, 1711,  431, 1455,  943, 1967,
+  111, 1135,  623, 1647,  367, 1391,  879, 1903,
+  239, 1263,  751, 1775,  495, 1519, 1007, 2031,
+   31, 1055,  543, 1567,  287, 1311,  799, 1823,
+  159, 1183,  671, 1695,  415, 1439,  927, 1951,
+   95, 1119,  607, 1631,  351, 1375,  863, 1887,
+  223, 1247,  735, 1759,  479, 1503,  991, 2015,
+   63, 1087,  575, 1599,  319, 1343,  831, 1855,
+  191, 1215,  703, 1727,  447, 1471,  959, 1983,
+  127, 1151,  639, 1663,  383, 1407,  895, 1919,
+  255, 1279,  767, 1791,  511, 1535, 1023, 2047,
+};
+
+inline void StoreStaticCommandHuffmanTree(size_t* storage_ix,
+                                          uint8_t* storage) {
+  WriteBits(28, 0x0000000006307003U, storage_ix, storage);
+  WriteBits(31, 0x0000000009262441U, storage_ix, storage);
+}
+
+static const uint16_t kStaticDistanceCodeBits[64] = {
+   0, 32, 16, 48,  8, 40, 24, 56,  4, 36, 20, 52, 12, 44, 28, 60,
+   2, 34, 18, 50, 10, 42, 26, 58,  6, 38, 22, 54, 14, 46, 30, 62,
+   1, 33, 17, 49,  9, 41, 25, 57,  5, 37, 21, 53, 13, 45, 29, 61,
+   3, 35, 19, 51, 11, 43, 27, 59,  7, 39, 23, 55, 15, 47, 31, 63,
+};
+
+inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
+                                           uint8_t* storage) {
+  WriteBits(18, 0x000000000001dc03U, storage_ix, storage);
+  WriteBits(10, 0x00000000000000daU, storage_ix, storage);
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
--- a/modules/brotli/enc/fast_log.h
+++ b/modules/brotli/enc/fast_log.h
@ -0,0 +1,139 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Utilities for fast computation of logarithms.
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <assert.h>
+#include <math.h>
+
+#include "./types.h"
+
+namespace brotli {
+
+static inline uint32_t Log2FloorNonZero(size_t n) {
+#ifdef __GNUC__
+  return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
+#else
+  uint32_t result = 0;
+  while (n >>= 1) result++;
+  return result;
+#endif
+}
+
+// A lookup table for small values of log2(int) to be used in entropy
+// computation.
+//
+// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
+static const float kLog2Table[] = {
+  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+  7.9943534368588578f
+};
+
+// Faster logarithm for small integers, with the property of log2(0) == 0.
+static inline double FastLog2(size_t v) {
+  if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
+    return kLog2Table[v];
+  }
+#if defined(_MSC_VER) && _MSC_VER <= 1700
+  // Visual Studio 2012 does not have the log2() function defined, so we use
+  // log() and a multiplication instead.
+  static const double kLog2Inv = 1.4426950408889634f;
+  return log(static_cast<double>(v)) * kLog2Inv;
+#else
+  return log2(static_cast<double>(v));
+#endif
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_FAST_LOG_H_
--- a/modules/brotli/enc/find_match_length.h
+++ b/modules/brotli/enc/find_match_length.h
@ -0,0 +1,77 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Function to find maximal matching prefixes of strings.
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+
+#include "./port.h"
+#include "./types.h"
+
+namespace brotli {
+
+// Separate implementation for little-endian 64-bit targets, for speed.
+#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
+
+static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                              const uint8_t* s2,
+                                              size_t limit) {
+  size_t matched = 0;
+  size_t limit2 = (limit >> 3) + 1;  // + 1 is for pre-decrement in while
+  while (PREDICT_TRUE(--limit2)) {
+    if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
+                      BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64_t x =
+          BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
+      size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
+  limit = (limit & 7) + 1;  // + 1 is for pre-decrement in while
+  while (--limit) {
+    if (PREDICT_TRUE(s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
+static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                             const uint8_t* s2,
+                                             size_t limit) {
+  size_t matched = 0;
+  const uint8_t* s2_limit = s2 + limit;
+  const uint8_t* s2_ptr = s2;
+  // Find out how long the match is. We loop over the data 32 bits at a
+  // time until we find a 32-bit block that doesn't match; then we find
+  // the first non-matching bit and use that to calculate the total
+  // length of the match.
+  while (s2_ptr <= s2_limit - 4 &&
+         BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
+         BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
+    s2_ptr += 4;
+    matched += 4;
+  }
+  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+    ++s2_ptr;
+    ++matched;
+  }
+  return matched;
+}
+#endif
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_FIND_MATCH_LENGTH_H_
--- a/modules/brotli/enc/hash.h
+++ b/modules/brotli/enc/hash.h
@ -0,0 +1,974 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <sys/types.h>
+#include <algorithm>
+#include <cstring>
+#include <limits>
+
+#include "./dictionary_hash.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./port.h"
+#include "./prefix.h"
+#include "./static_dict.h"
+#include "./transform.h"
+#include "./types.h"
+
+namespace brotli {
+
+static const size_t kMaxTreeSearchDepth = 64;
+static const size_t kMaxTreeCompLength = 128;
+
+static const uint32_t kDistanceCacheIndex[] = {
+  0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+};
+static const int kDistanceCacheOffset[] = {
+  0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+};
+
+static const uint32_t kCutoffTransformsCount = 10;
+static const uint8_t kCutoffTransforms[] = {
+  0, 12, 27, 23, 42, 63, 56, 48, 59, 64
+};
+
+// kHashMul32 multiplier has these properties:
+// * The multiplier must be odd. Otherwise we may lose the highest bit.
+// * No long streaks of 1s or 0s.
+// * There is no effort to ensure that it is a prime, the oddity is enough
+//   for this use.
+// * The number has been tuned heuristically against compression benchmarks.
+static const uint32_t kHashMul32 = 0x1e35a7bd;
+
+template<int kShiftBits>
+inline uint32_t Hash(const uint8_t *data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+  // The higher bits contain more mixture from the multiplication,
+  // so we take our results from there.
+  return h >> (32 - kShiftBits);
+}
+
+// Usually, we always choose the longest backward reference. This function
+// allows for the exception of that rule.
+//
+// If we choose a backward reference that is further away, it will
+// usually be coded with more bits. We approximate this by assuming
+// log2(distance). If the distance can be expressed in terms of the
+// last four distances, we use some heuristic constants to estimate
+// the bits cost. For the first up to four literals we use the bit
+// cost of the literals from the literal cost model, after that we
+// use the average bit cost of the cost model.
+//
+// This function is used to sometimes discard a longer backward reference
+// when it is not much longer and the bit cost for encoding it is more
+// than the saved literals.
+//
+// backward_reference_offset MUST be positive.
+inline double BackwardReferenceScore(size_t copy_length,
+                                     size_t backward_reference_offset) {
+  return 5.4 * static_cast<double>(copy_length) -
+      1.20 * Log2FloorNonZero(backward_reference_offset);
+}
+
+inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
+    size_t distance_short_code) {
+  static const double kDistanceShortCodeBitCost[16] = {
+    -0.6, 0.95, 1.17, 1.27,
+    0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
+    1.05, 1.05, 1.15, 1.15, 1.25, 1.25
+  };
+  return 5.4 * static_cast<double>(copy_length) -
+      kDistanceShortCodeBitCost[distance_short_code];
+}
+
+struct BackwardMatch {
+  BackwardMatch(void) : distance(0), length_and_code(0) {}
+
+  BackwardMatch(size_t dist, size_t len)
+      : distance(static_cast<uint32_t>(dist))
+      , length_and_code(static_cast<uint32_t>(len << 5)) {}
+
+  BackwardMatch(size_t dist, size_t len, size_t len_code)
+      : distance(static_cast<uint32_t>(dist))
+      , length_and_code(static_cast<uint32_t>(
+            (len << 5) | (len == len_code ? 0 : len_code))) {}
+
+  size_t length(void) const {
+    return length_and_code >> 5;
+  }
+  size_t length_code(void) const {
+    size_t code = length_and_code & 31;
+    return code ? code : length();
+  }
+
+  uint32_t distance;
+  uint32_t length_and_code;
+};
+
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+//
+// This is a hash map of fixed size (kBucketSize). Starting from the
+// given index, kBucketSweep buckets are used to store values of a key.
+template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
+class HashLongestMatchQuickly {
+ public:
+  HashLongestMatchQuickly(void) {
+    Reset();
+  }
+  void Reset(void) {
+    need_init_ = true;
+    num_dict_lookups_ = 0;
+    num_dict_matches_ = 0;
+  }
+  void Init(void) {
+    if (need_init_) {
+      // It is not strictly necessary to fill this buffer here, but
+      // not filling will make the results of the compression stochastic
+      // (but correct). This is because random data would cause the
+      // system to find accidentally good backward references here and there.
+      memset(&buckets_[0], 0, sizeof(buckets_));
+      need_init_ = false;
+    }
+  }
+  void InitForData(const uint8_t* data, size_t num) {
+    for (size_t i = 0; i < num; ++i) {
+      const uint32_t key = HashBytes(&data[i]);
+      memset(&buckets_[key], 0, kBucketSweep * sizeof(buckets_[0]));
+      need_init_ = false;
+    }
+  }
+  // Look at 4 bytes at data.
+  // Compute a hash from these, and store the value somewhere within
+  // [ix .. ix+3].
+  inline void Store(const uint8_t *data, const uint32_t ix) {
+    const uint32_t key = HashBytes(data);
+    // Wiggle the value with the bucket sweep range.
+    const uint32_t off = (ix >> 3) % kBucketSweep;
+    buckets_[key + off] = ix;
+  }
+
+  // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
+  // up to the length of max_length and stores the position cur_ix in the
+  // hash table.
+  //
+  // Does not look for matches longer than max_length.
+  // Does not look for matches further away than max_backward.
+  // Writes the best found match length into best_len_out.
+  // Writes the index (&data[index]) of the start of the best match into
+  // best_distance_out.
+  inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
+                               const size_t ring_buffer_mask,
+                               const int* __restrict distance_cache,
+                               const size_t cur_ix,
+                               const size_t max_length,
+                               const size_t max_backward,
+                               size_t * __restrict best_len_out,
+                               size_t * __restrict best_len_code_out,
+                               size_t * __restrict best_distance_out,
+                               double* __restrict best_score_out) {
+    const size_t best_len_in = *best_len_out;
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
+    int compare_char = ring_buffer[cur_ix_masked + best_len_in];
+    double best_score = *best_score_out;
+    size_t best_len = best_len_in;
+    size_t cached_backward = static_cast<size_t>(distance_cache[0]);
+    size_t prev_ix = cur_ix - cached_backward;
+    bool match_found = false;
+    if (prev_ix < cur_ix) {
+      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+      if (compare_char == ring_buffer[prev_ix + best_len]) {
+        size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                              &ring_buffer[cur_ix_masked],
+                                              max_length);
+        if (len >= 4) {
+          best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
+          best_len = len;
+          *best_len_out = len;
+          *best_len_code_out = len;
+          *best_distance_out = cached_backward;
+          *best_score_out = best_score;
+          compare_char = ring_buffer[cur_ix_masked + best_len];
+          if (kBucketSweep == 1) {
+            buckets_[key] = static_cast<uint32_t>(cur_ix);
+            return true;
+          } else {
+            match_found = true;
+          }
+        }
+      }
+    }
+    if (kBucketSweep == 1) {
+      // Only one to look for, don't bother to prepare for a loop.
+      prev_ix = buckets_[key];
+      buckets_[key] = static_cast<uint32_t>(cur_ix);
+      size_t backward = cur_ix - prev_ix;
+      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+      if (compare_char != ring_buffer[prev_ix + best_len_in]) {
+        return false;
+      }
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        return false;
+      }
+      const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                                  &ring_buffer[cur_ix_masked],
+                                                  max_length);
+      if (len >= 4) {
+        *best_len_out = len;
+        *best_len_code_out = len;
+        *best_distance_out = backward;
+        *best_score_out = BackwardReferenceScore(len, backward);
+        return true;
+      }
+    } else {
+      uint32_t *bucket = buckets_ + key;
+      prev_ix = *bucket++;
+      for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
+        const size_t backward = cur_ix - prev_ix;
+        prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+        if (compare_char != ring_buffer[prev_ix + best_len]) {
+          continue;
+        }
+        if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+          continue;
+        }
+        const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                                    &ring_buffer[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          const double score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            *best_len_out = best_len;
+            *best_len_code_out = best_len;
+            *best_distance_out = backward;
+            *best_score_out = score;
+            compare_char = ring_buffer[cur_ix_masked + best_len];
+            match_found = true;
+          }
+        }
+      }
+    }
+    if (kUseDictionary && !match_found &&
+        num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
+      ++num_dict_lookups_;
+      const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
+      const uint16_t v = kStaticDictionaryHash[dict_key];
+      if (v > 0) {
+        const uint32_t len = v & 31;
+        const uint32_t dist = v >> 5;
+        const size_t offset =
+            kBrotliDictionaryOffsetsByLength[len] + len * dist;
+        if (len <= max_length) {
+          const size_t matchlen =
+              FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
+                                       &kBrotliDictionary[offset], len);
+          if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+            const size_t transform_id = kCutoffTransforms[len - matchlen];
+            const size_t word_id =
+                transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
+                dist;
+            const size_t backward = max_backward + word_id + 1;
+            const double score = BackwardReferenceScore(matchlen, backward);
+            if (best_score < score) {
+              ++num_dict_matches_;
+              best_score = score;
+              best_len = matchlen;
+              *best_len_out = best_len;
+              *best_len_code_out = len;
+              *best_distance_out = backward;
+              *best_score_out = best_score;
+              match_found = true;
+            }
+          }
+        }
+      }
+    }
+    const uint32_t off = (cur_ix >> 3) % kBucketSweep;
+    buckets_[key + off] = static_cast<uint32_t>(cur_ix);
+    return match_found;
+  }
+
+  enum { kHashLength = 5 };
+  enum { kHashTypeLength = 8 };
+  // HashBytes is the function that chooses the bucket to place
+  // the address in. The HashLongestMatch and HashLongestMatchQuickly
+  // classes have separate, different implementations of hashing.
+  static uint32_t HashBytes(const uint8_t *data) {
+    // Computing a hash based on 5 bytes works much better for
+    // qualities 1 and 3, where the next hash value is likely to replace
+    uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
+    // The higher bits contain more mixture from the multiplication,
+    // so we take our results from there.
+    return static_cast<uint32_t>(h >> (64 - kBucketBits));
+  }
+
+  enum { kHashMapSize = 4 << kBucketBits };
+
+ private:
+  static const uint32_t kBucketSize = 1 << kBucketBits;
+  uint32_t buckets_[kBucketSize + kBucketSweep];
+  // True if buckets_ array needs to be initialized.
+  bool need_init_;
+  size_t num_dict_lookups_;
+  size_t num_dict_matches_;
+};
+
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+//
+// This is a hash map of fixed size (kBucketSize) to a ring buffer of
+// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
+// index positions of the given hash key in the compressed data.
+template <int kBucketBits,
+          int kBlockBits,
+          int kNumLastDistancesToCheck>
+class HashLongestMatch {
+ public:
+  HashLongestMatch(void) {
+    Reset();
+  }
+
+  void Reset(void) {
+    need_init_ = true;
+    num_dict_lookups_ = 0;
+    num_dict_matches_ = 0;
+  }
+
+  void Init(void) {
+    if (need_init_) {
+      memset(&num_[0], 0, sizeof(num_));
+      need_init_ = false;
+    }
+  }
+
+  void InitForData(const uint8_t* data, size_t num) {
+    for (size_t i = 0; i < num; ++i) {
+      const uint32_t key = HashBytes(&data[i]);
+      num_[key] = 0;
+      need_init_ = false;
+    }
+  }
+
+  // Look at 3 bytes at data.
+  // Compute a hash from these, and store the value of ix at that position.
+  inline void Store(const uint8_t *data, const uint32_t ix) {
+    const uint32_t key = HashBytes(data);
+    const int minor_ix = num_[key] & kBlockMask;
+    buckets_[key][minor_ix] = ix;
+    ++num_[key];
+  }
+
+  // Find a longest backward match of &data[cur_ix] up to the length of
+  // max_length and stores the position cur_ix in the hash table.
+  //
+  // Does not look for matches longer than max_length.
+  // Does not look for matches further away than max_backward.
+  // Writes the best found match length into best_len_out.
+  // Writes the index (&data[index]) offset from the start of the best match
+  // into best_distance_out.
+  // Write the score of the best match into best_score_out.
+  bool FindLongestMatch(const uint8_t * __restrict data,
+                        const size_t ring_buffer_mask,
+                        const int* __restrict distance_cache,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        size_t * __restrict best_len_out,
+                        size_t * __restrict best_len_code_out,
+                        size_t * __restrict best_distance_out,
+                        double * __restrict best_score_out) {
+    *best_len_code_out = 0;
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    bool match_found = false;
+    // Don't accept a short copy from far away.
+    double best_score = *best_score_out;
+    size_t best_len = *best_len_out;
+    *best_len_out = 0;
+    // Try last distance first.
+    for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
+      const size_t idx = kDistanceCacheIndex[i];
+      const size_t backward =
+          static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
+      size_t prev_ix = static_cast<size_t>(cur_ix - backward);
+      if (prev_ix >= cur_ix) {
+        continue;
+      }
+      if (PREDICT_FALSE(backward > max_backward)) {
+        continue;
+      }
+      prev_ix &= ring_buffer_mask;
+
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        // Comparing for >= 2 does not change the semantics, but just saves for
+        // a few unnecessary binary logarithms in backward reference score,
+        // since we are not interested in such short matches.
+        double score = BackwardReferenceScoreUsingLastDistance(len, i);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          *best_len_out = best_len;
+          *best_len_code_out = best_len;
+          *best_distance_out = backward;
+          *best_score_out = best_score;
+          match_found = true;
+        }
+      }
+    }
+    const uint32_t key = HashBytes(&data[cur_ix_masked]);
+    const uint32_t * __restrict const bucket = &buckets_[key][0];
+    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    for (size_t i = num_[key]; i > down;) {
+      --i;
+      size_t prev_ix = bucket[i & kBlockMask];
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 4) {
+        // Comparing for >= 3 does not change the semantics, but just saves
+        // for a few unnecessary binary logarithms in backward reference
+        // score, since we are not interested in such short matches.
+        double score = BackwardReferenceScore(len, backward);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          *best_len_out = best_len;
+          *best_len_code_out = best_len;
+          *best_distance_out = backward;
+          *best_score_out = best_score;
+          match_found = true;
+        }
+      }
+    }
+    buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
+    ++num_[key];
+    if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
+      size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
+      for (int k = 0; k < 2; ++k, ++dict_key) {
+        ++num_dict_lookups_;
+        const uint16_t v = kStaticDictionaryHash[dict_key];
+        if (v > 0) {
+          const size_t len = v & 31;
+          const size_t dist = v >> 5;
+          const size_t offset =
+              kBrotliDictionaryOffsetsByLength[len] + len * dist;
+          if (len <= max_length) {
+            const size_t matchlen =
+                FindMatchLengthWithLimit(&data[cur_ix_masked],
+                                         &kBrotliDictionary[offset], len);
+            if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+              const size_t transform_id = kCutoffTransforms[len - matchlen];
+              const size_t word_id =
+                  transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
+                  dist;
+              const size_t backward = max_backward + word_id + 1;
+              double score = BackwardReferenceScore(matchlen, backward);
+              if (best_score < score) {
+                ++num_dict_matches_;
+                best_score = score;
+                best_len = matchlen;
+                *best_len_out = best_len;
+                *best_len_code_out = len;
+                *best_distance_out = backward;
+                *best_score_out = best_score;
+                match_found = true;
+              }
+            }
+          }
+        }
+      }
+    }
+    return match_found;
+  }
+
+  // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+  // length of max_length and stores the position cur_ix in the hash table.
+  //
+  // Sets *num_matches to the number of matches found, and stores the found
+  // matches in matches[0] to matches[*num_matches - 1]. The matches will be
+  // sorted by strictly increasing length and (non-strictly) increasing
+  // distance.
+  size_t FindAllMatches(const uint8_t* data,
+                        const size_t ring_buffer_mask,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        BackwardMatch* matches) {
+    BackwardMatch* const orig_matches = matches;
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    size_t best_len = 1;
+    size_t stop = cur_ix - 64;
+    if (cur_ix < 64) { stop = 0; }
+    for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+      size_t prev_ix = i;
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (data[cur_ix_masked] != data[prev_ix] ||
+          data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        *matches++ = BackwardMatch(backward, len);
+      }
+    }
+    const uint32_t key = HashBytes(&data[cur_ix_masked]);
+    const uint32_t * __restrict const bucket = &buckets_[key][0];
+    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    for (size_t i = num_[key]; i > down;) {
+      --i;
+      size_t prev_ix = bucket[i & kBlockMask];
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        *matches++ = BackwardMatch(backward, len);
+      }
+    }
+    buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
+    ++num_[key];
+    uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
+    for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
+      dict_matches[i] = kInvalidMatch;
+    }
+    size_t minlen = std::max<size_t>(4, best_len + 1);
+    if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
+                                       &dict_matches[0])) {
+      size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
+      for (size_t l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
+        if (dict_id < kInvalidMatch) {
+          *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
+                                     dict_id & 31);
+        }
+      }
+    }
+    return static_cast<size_t>(matches - orig_matches);
+  }
+
+  enum { kHashLength = 4 };
+  enum { kHashTypeLength = 4 };
+
+  // HashBytes is the function that chooses the bucket to place
+  // the address in. The HashLongestMatch and HashLongestMatchQuickly
+  // classes have separate, different implementations of hashing.
+  static uint32_t HashBytes(const uint8_t *data) {
+    uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+    // The higher bits contain more mixture from the multiplication,
+    // so we take our results from there.
+    return h >> (32 - kBucketBits);
+  }
+
+  enum { kHashMapSize = 2 << kBucketBits };
+
+  static const size_t kMaxNumMatches = 64 + (1 << kBlockBits);
+
+ private:
+  // Number of hash buckets.
+  static const uint32_t kBucketSize = 1 << kBucketBits;
+
+  // Only kBlockSize newest backward references are kept,
+  // and the older are forgotten.
+  static const uint32_t kBlockSize = 1 << kBlockBits;
+
+  // Mask for accessing entries in a block (in a ringbuffer manner).
+  static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
+
+  // Number of entries in a particular bucket.
+  uint16_t num_[kBucketSize];
+
+  // Buckets containing kBlockSize of backward references.
+  uint32_t buckets_[kBucketSize][kBlockSize];
+
+  // True if num_ array needs to be initialized.
+  bool need_init_;
+
+  size_t num_dict_lookups_;
+  size_t num_dict_matches_;
+};
+
+// A (forgetful) hash table where each hash bucket contains a binary tree of
+// sequences whose first 4 bytes share the same hash code.
+// Each sequence is kMaxTreeCompLength long and is identified by its starting
+// position in the input data. The binary tree is sorted by the lexicographic
+// order of the sequences, and it is also a max-heap with respect to the
+// starting positions.
+class HashToBinaryTree {
+ public:
+  HashToBinaryTree() : forest_(NULL) {
+    Reset();
+  }
+
+  ~HashToBinaryTree() {
+    delete[] forest_;
+  }
+
+  void Reset() {
+    need_init_ = true;
+  }
+
+  void Init(int lgwin, size_t position, size_t bytes, bool is_last) {
+    if (need_init_) {
+      window_mask_ = (1u << lgwin) - 1u;
+      invalid_pos_ = static_cast<uint32_t>(-window_mask_);
+      for (uint32_t i = 0; i < kBucketSize; i++) {
+        buckets_[i] = invalid_pos_;
+      }
+      size_t num_nodes = (position == 0 && is_last) ? bytes : window_mask_ + 1;
+      forest_ = new uint32_t[2 * num_nodes];
+      need_init_ = false;
+    }
+  }
+
+  // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
+  // length of max_length and stores the position cur_ix in the hash table.
+  //
+  // Sets *num_matches to the number of matches found, and stores the found
+  // matches in matches[0] to matches[*num_matches - 1]. The matches will be
+  // sorted by strictly increasing length and (non-strictly) increasing
+  // distance.
+  size_t FindAllMatches(const uint8_t* data,
+                        const size_t ring_buffer_mask,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        BackwardMatch* matches) {
+    BackwardMatch* const orig_matches = matches;
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    size_t best_len = 1;
+    size_t stop = cur_ix - 64;
+    if (cur_ix < 64) { stop = 0; }
+    for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+      size_t prev_ix = i;
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (data[cur_ix_masked] != data[prev_ix] ||
+          data[cur_ix_masked + 1] != data[prev_ix + 1]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        *matches++ = BackwardMatch(backward, len);
+      }
+    }
+    if (best_len < max_length) {
+      matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
+                                    max_length, &best_len, matches);
+    }
+    uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
+    for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
+      dict_matches[i] = kInvalidMatch;
+    }
+    size_t minlen = std::max<size_t>(4, best_len + 1);
+    if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
+                                       &dict_matches[0])) {
+      size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
+      for (size_t l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
+        if (dict_id < kInvalidMatch) {
+          *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
+                                     dict_id & 31);
+        }
+      }
+    }
+    return static_cast<size_t>(matches - orig_matches);
+  }
+
+  // Stores the hash of the next 4 bytes and re-roots the binary tree at the
+  // current sequence, without returning any matches.
+  // REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
+  void Store(const uint8_t* data,
+             const size_t ring_buffer_mask,
+             const size_t cur_ix) {
+    size_t best_len = 0;
+    StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
+                        &best_len, NULL);
+  }
+
+  void StitchToPreviousBlock(size_t num_bytes,
+                             size_t position,
+                             const uint8_t* ringbuffer,
+                             size_t ringbuffer_mask) {
+    if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
+      // Store the last `kMaxTreeCompLength - 1` positions in the hasher.
+      // These could not be calculated before, since they require knowledge
+      // of both the previous and the current block.
+      const size_t i_start = position - kMaxTreeCompLength + 1;
+      const size_t i_end = std::min(position, i_start + num_bytes);
+      for (size_t i = i_start; i < i_end; ++i) {
+        // We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
+        // end of the current block and that we have at least
+        // kMaxTreeCompLength tail in the ringbuffer.
+        Store(ringbuffer, ringbuffer_mask, i);
+      }
+    }
+  }
+
+  static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
+
+ private:
+  // Stores the hash of the next 4 bytes and in a single tree-traversal, the
+  // hash bucket's binary tree is searched for matches and is re-rooted at the
+  // current position.
+  //
+  // If less than kMaxTreeCompLength data is available, the hash bucket of the
+  // current position is searched for matches, but the state of the hash table
+  // is not changed, since we can not know the final sorting order of the
+  // current (incomplete) sequence.
+  //
+  // This function must be called with increasing cur_ix positions.
+  BackwardMatch* StoreAndFindMatches(const uint8_t* const __restrict data,
+                                     const size_t cur_ix,
+                                     const size_t ring_buffer_mask,
+                                     const size_t max_length,
+                                     size_t* const __restrict best_len,
+                                     BackwardMatch* __restrict matches) {
+    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+    const size_t max_backward = window_mask_ - 15;
+    const size_t max_comp_len = std::min(max_length, kMaxTreeCompLength);
+    const bool reroot_tree = max_length >= kMaxTreeCompLength;
+    const uint32_t key = HashBytes(&data[cur_ix_masked]);
+    size_t prev_ix = buckets_[key];
+    // The forest index of the rightmost node of the left subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t node_left = LeftChildIndex(cur_ix);
+    // The forest index of the leftmost node of the right subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t node_right = RightChildIndex(cur_ix);
+    // The match length of the rightmost node of the left subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t best_len_left = 0;
+    // The match length of the leftmost node of the right subtree of the new
+    // root, updated as we traverse and reroot the tree of the hash bucket.
+    size_t best_len_right = 0;
+    if (reroot_tree) {
+      buckets_[key] = static_cast<uint32_t>(cur_ix);
+    }
+    for (size_t depth_remaining = kMaxTreeSearchDepth; ; --depth_remaining) {
+      const size_t backward = cur_ix - prev_ix;
+      const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
+      if (backward == 0 || backward > max_backward || depth_remaining == 0) {
+        if (reroot_tree) {
+          forest_[node_left] = invalid_pos_;
+          forest_[node_right] = invalid_pos_;
+        }
+        break;
+      }
+      const size_t cur_len = std::min(best_len_left, best_len_right);
+      const size_t len = cur_len +
+          FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
+                                   &data[prev_ix_masked + cur_len],
+                                   max_length - cur_len);
+      if (len > *best_len) {
+        *best_len = len;
+        if (matches) {
+          *matches++ = BackwardMatch(backward, len);
+        }
+        if (len >= max_comp_len) {
+          if (reroot_tree) {
+            forest_[node_left] = forest_[LeftChildIndex(prev_ix)];
+            forest_[node_right] = forest_[RightChildIndex(prev_ix)];
+          }
+          break;
+        }
+      }
+      if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
+        best_len_left = len;
+        if (reroot_tree) {
+          forest_[node_left] = static_cast<uint32_t>(prev_ix);
+        }
+        node_left = RightChildIndex(prev_ix);
+        prev_ix = forest_[node_left];
+      } else {
+        best_len_right = len;
+        if (reroot_tree) {
+          forest_[node_right] = static_cast<uint32_t>(prev_ix);
+        }
+        node_right = LeftChildIndex(prev_ix);
+        prev_ix = forest_[node_right];
+      }
+    }
+    return matches;
+  }
+
+  inline size_t LeftChildIndex(const size_t pos) {
+    return 2 * (pos & window_mask_);
+  }
+
+  inline size_t RightChildIndex(const size_t pos) {
+    return 2 * (pos & window_mask_) + 1;
+  }
+
+  static uint32_t HashBytes(const uint8_t *data) {
+    uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+    // The higher bits contain more mixture from the multiplication,
+    // so we take our results from there.
+    return h >> (32 - kBucketBits);
+  }
+
+  static const int kBucketBits = 17;
+  static const size_t kBucketSize = 1 << kBucketBits;
+
+  // The window size minus 1
+  size_t window_mask_;
+
+  // Hash table that maps the 4-byte hashes of the sequence to the last
+  // position where this hash was found, which is the root of the binary
+  // tree of sequences that share this hash bucket.
+  uint32_t buckets_[kBucketSize];
+
+  // The union of the binary trees of each hash bucket. The root of the tree
+  // corresponding to a hash is a sequence starting at buckets_[hash] and
+  // the left and right children of a sequence starting at pos are
+  // forest_[2 * pos] and forest_[2 * pos + 1].
+  uint32_t* forest_;
+
+  // A position used to mark a non-existent sequence, i.e. a tree is empty if
+  // its root is at invalid_pos_ and a node is a leaf if both its children
+  // are at invalid_pos_.
+  uint32_t invalid_pos_;
+
+  bool need_init_;
+};
+
+struct Hashers {
+  // For kBucketSweep == 1, enabling the dictionary lookup makes compression
+  // a little faster (0.5% - 1%) and it compresses 0.15% better on small text
+  // and html inputs.
+  typedef HashLongestMatchQuickly<16, 1, true> H2;
+  typedef HashLongestMatchQuickly<16, 2, false> H3;
+  typedef HashLongestMatchQuickly<17, 4, true> H4;
+  typedef HashLongestMatch<14, 4, 4> H5;
+  typedef HashLongestMatch<14, 5, 4> H6;
+  typedef HashLongestMatch<15, 6, 10> H7;
+  typedef HashLongestMatch<15, 7, 10> H8;
+  typedef HashLongestMatch<15, 8, 16> H9;
+  typedef HashToBinaryTree H10;
+
+  Hashers(void) : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
+                  hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0), hash_h10(0) {}
+
+  ~Hashers(void) {
+    delete hash_h2;
+    delete hash_h3;
+    delete hash_h4;
+    delete hash_h5;
+    delete hash_h6;
+    delete hash_h7;
+    delete hash_h8;
+    delete hash_h9;
+    delete hash_h10;
+  }
+
+  void Init(int type) {
+    switch (type) {
+      case 2: hash_h2 = new H2; break;
+      case 3: hash_h3 = new H3; break;
+      case 4: hash_h4 = new H4; break;
+      case 5: hash_h5 = new H5; break;
+      case 6: hash_h6 = new H6; break;
+      case 7: hash_h7 = new H7; break;
+      case 8: hash_h8 = new H8; break;
+      case 9: hash_h9 = new H9; break;
+      case 10: hash_h10 = new H10; break;
+      default: break;
+    }
+  }
+
+  template<typename Hasher>
+  void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
+    hasher->Init();
+    for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
+      hasher->Store(&dict[i], static_cast<uint32_t>(i));
+    }
+  }
+
+  // Custom LZ77 window.
+  void PrependCustomDictionary(
+      int type, int lgwin, const size_t size, const uint8_t* dict) {
+    switch (type) {
+      case 2: WarmupHash(size, dict, hash_h2); break;
+      case 3: WarmupHash(size, dict, hash_h3); break;
+      case 4: WarmupHash(size, dict, hash_h4); break;
+      case 5: WarmupHash(size, dict, hash_h5); break;
+      case 6: WarmupHash(size, dict, hash_h6); break;
+      case 7: WarmupHash(size, dict, hash_h7); break;
+      case 8: WarmupHash(size, dict, hash_h8); break;
+      case 9: WarmupHash(size, dict, hash_h9); break;
+      case 10:
+        hash_h10->Init(lgwin, 0, size, false);
+        for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
+          hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
+        }
+        break;
+      default: break;
+    }
+  }
+
+
+  H2* hash_h2;
+  H3* hash_h3;
+  H4* hash_h4;
+  H5* hash_h5;
+  H6* hash_h6;
+  H7* hash_h7;
+  H8* hash_h8;
+  H9* hash_h9;
+  H10* hash_h10;
+};
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_HASH_H_
--- a/modules/brotli/enc/histogram.cc
+++ b/modules/brotli/enc/histogram.cc
@ -0,0 +1,67 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Build per-context histograms of literals, commands and distance codes.
+
+#include "./histogram.h"
+
+#include <cmath>
+
+#include "./block_splitter.h"
+#include "./command.h"
+#include "./context.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+void BuildHistograms(
+    const Command* cmds,
+    const size_t num_commands,
+    const BlockSplit& literal_split,
+    const BlockSplit& insert_and_copy_split,
+    const BlockSplit& dist_split,
+    const uint8_t* ringbuffer,
+    size_t start_pos,
+    size_t mask,
+    uint8_t prev_byte,
+    uint8_t prev_byte2,
+    const std::vector<ContextType>& context_modes,
+    std::vector<HistogramLiteral>* literal_histograms,
+    std::vector<HistogramCommand>* insert_and_copy_histograms,
+    std::vector<HistogramDistance>* copy_dist_histograms) {
+  size_t pos = start_pos;
+  BlockSplitIterator literal_it(literal_split);
+  BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
+  BlockSplitIterator dist_it(dist_split);
+  for (size_t i = 0; i < num_commands; ++i) {
+    const Command &cmd = cmds[i];
+    insert_and_copy_it.Next();
+    (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
+        cmd.cmd_prefix_);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      literal_it.Next();
+      size_t context = (literal_it.type_ << kLiteralContextBits) +
+          Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
+      (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
+      prev_byte2 = prev_byte;
+      prev_byte = ringbuffer[pos & mask];
+      ++pos;
+    }
+    pos += cmd.copy_len();
+    if (cmd.copy_len()) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        dist_it.Next();
+        size_t context = (dist_it.type_ << kDistanceContextBits) +
+            cmd.DistanceContext();
+        (*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
+      }
+    }
+  }
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/histogram.h
+++ b/modules/brotli/enc/histogram.h
@ -0,0 +1,94 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Models the histograms of literals, commands and distance codes.
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <cstring>
+#include <limits>
+#include <vector>
+#include "./context.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./prefix.h"
+#include "./types.h"
+
+namespace brotli {
+
+struct BlockSplit;
+
+// A simple container for histograms of data in blocks.
+template<int kDataSize>
+struct Histogram {
+  Histogram(void) {
+    Clear();
+  }
+  void Clear(void) {
+    memset(data_, 0, sizeof(data_));
+    total_count_ = 0;
+    bit_cost_ = std::numeric_limits<double>::infinity();
+  }
+  void Add(size_t val) {
+    ++data_[val];
+    ++total_count_;
+  }
+  void Remove(size_t val) {
+    --data_[val];
+    --total_count_;
+  }
+  template<typename DataType>
+  void Add(const DataType *p, size_t n) {
+    total_count_ += n;
+    n += 1;
+    while(--n) ++data_[*p++];
+  }
+  void AddHistogram(const Histogram& v) {
+    total_count_ += v.total_count_;
+    for (size_t i = 0; i < kDataSize; ++i) {
+      data_[i] += v.data_[i];
+    }
+  }
+
+  uint32_t data_[kDataSize];
+  size_t total_count_;
+  double bit_cost_;
+};
+
+// Literal histogram.
+typedef Histogram<256> HistogramLiteral;
+// Prefix histograms.
+typedef Histogram<kNumCommandPrefixes> HistogramCommand;
+typedef Histogram<kNumDistancePrefixes> HistogramDistance;
+typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
+// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
+typedef Histogram<272> HistogramContextMap;
+// Block type histogram, 256 block types + 2 special symbols.
+typedef Histogram<258> HistogramBlockType;
+
+static const size_t kLiteralContextBits = 6;
+static const size_t kDistanceContextBits = 2;
+
+void BuildHistograms(
+    const Command* cmds,
+    const size_t num_commands,
+    const BlockSplit& literal_split,
+    const BlockSplit& insert_and_copy_split,
+    const BlockSplit& dist_split,
+    const uint8_t* ringbuffer,
+    size_t pos,
+    size_t mask,
+    uint8_t prev_byte,
+    uint8_t prev_byte2,
+    const std::vector<ContextType>& context_modes,
+    std::vector<HistogramLiteral>* literal_histograms,
+    std::vector<HistogramCommand>* insert_and_copy_histograms,
+    std::vector<HistogramDistance>* copy_dist_histograms);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_HISTOGRAM_H_
--- a/modules/brotli/enc/literal_cost.cc
+++ b/modules/brotli/enc/literal_cost.cc
@ -0,0 +1,165 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#include "./literal_cost.h"
+
+#include <math.h>
+#include <algorithm>
+
+#include "./fast_log.h"
+#include "./types.h"
+#include "./utf8_util.h"
+
+namespace brotli {
+
+static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
+  if (c < 128) {
+    return 0;  // Next one is the 'Byte 1' again.
+  } else if (c >= 192) {  // Next one is the 'Byte 2' of utf-8 encoding.
+    return std::min<size_t>(1, clamp);
+  } else {
+    // Let's decide over the last byte if this ends the sequence.
+    if (last < 0xe0) {
+      return 0;  // Completed two or three byte coding.
+    } else {  // Next one is the 'Byte 3' of utf-8 encoding.
+      return std::min<size_t>(2, clamp);
+    }
+  }
+}
+
+static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
+                                        const uint8_t *data) {
+  size_t counts[3] = { 0 };
+  size_t max_utf8 = 1;  // should be 2, but 1 compresses better.
+  size_t last_c = 0;
+  size_t utf8_pos = 0;
+  for (size_t i = 0; i < len; ++i) {
+    size_t c = data[(pos + i) & mask];
+    utf8_pos = UTF8Position(last_c, c, 2);
+    ++counts[utf8_pos];
+    last_c = c;
+  }
+  if (counts[2] < 500) {
+    max_utf8 = 1;
+  }
+  if (counts[1] + counts[2] < 25) {
+    max_utf8 = 0;
+  }
+  return max_utf8;
+}
+
+static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
+                                            const uint8_t *data, float *cost) {
+
+  // max_utf8 is 0 (normal ascii single byte modeling),
+  // 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
+  const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
+  size_t histogram[3][256] = { { 0 } };
+  size_t window_half = 495;
+  size_t in_window = std::min(window_half, len);
+  size_t in_window_utf8[3] = { 0 };
+
+  // Bootstrap histograms.
+  size_t last_c = 0;
+  size_t utf8_pos = 0;
+  for (size_t i = 0; i < in_window; ++i) {
+    size_t c = data[(pos + i) & mask];
+    ++histogram[utf8_pos][c];
+    ++in_window_utf8[utf8_pos];
+    utf8_pos = UTF8Position(last_c, c, max_utf8);
+    last_c = c;
+  }
+
+  // Compute bit costs with sliding window.
+  for (size_t i = 0; i < len; ++i) {
+    if (i >= window_half) {
+      // Remove a byte in the past.
+      size_t c = i < window_half + 1 ?
+          0 : data[(pos + i - window_half - 1) & mask];
+      size_t last_c = i < window_half + 2 ?
+          0 : data[(pos + i - window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
+      --in_window_utf8[utf8_pos2];
+    }
+    if (i + window_half < len) {
+      // Add a byte in the future.
+      size_t c = data[(pos + i + window_half - 1) & mask];
+      size_t last_c = data[(pos + i + window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
+      ++in_window_utf8[utf8_pos2];
+    }
+    size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
+    size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
+    size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
+    size_t masked_pos = (pos + i) & mask;
+    size_t histo = histogram[utf8_pos][data[masked_pos]];
+    if (histo == 0) {
+      histo = 1;
+    }
+    double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
+    lit_cost += 0.02905;
+    if (lit_cost < 1.0) {
+      lit_cost *= 0.5;
+      lit_cost += 0.5;
+    }
+    // Make the first bytes more expensive -- seems to help, not sure why.
+    // Perhaps because the entropy source is changing its properties
+    // rapidly in the beginning of the file, perhaps because the beginning
+    // of the data is a statistical "anomaly".
+    if (i < 2000) {
+      lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
+    }
+    cost[i] = static_cast<float>(lit_cost);
+  }
+}
+
+void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                 const uint8_t *data, float *cost) {
+  if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
+    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
+    return;
+  }
+  size_t histogram[256] = { 0 };
+  size_t window_half = 2000;
+  size_t in_window = std::min(window_half, len);
+
+  // Bootstrap histogram.
+  for (size_t i = 0; i < in_window; ++i) {
+    ++histogram[data[(pos + i) & mask]];
+  }
+
+  // Compute bit costs with sliding window.
+  for (size_t i = 0; i < len; ++i) {
+    if (i >= window_half) {
+      // Remove a byte in the past.
+      --histogram[data[(pos + i - window_half) & mask]];
+      --in_window;
+    }
+    if (i + window_half < len) {
+      // Add a byte in the future.
+      ++histogram[data[(pos + i + window_half) & mask]];
+      ++in_window;
+    }
+    size_t histo = histogram[data[(pos + i) & mask]];
+    if (histo == 0) {
+      histo = 1;
+    }
+    double lit_cost = FastLog2(in_window) - FastLog2(histo);
+    lit_cost += 0.029;
+    if (lit_cost < 1.0) {
+      lit_cost *= 0.5;
+      lit_cost += 0.5;
+    }
+    cost[i] = static_cast<float>(lit_cost);
+  }
+}
+
+
+}  // namespace brotli
--- a/modules/brotli/enc/literal_cost.h
+++ b/modules/brotli/enc/literal_cost.h
@ -0,0 +1,24 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include "./types.h"
+
+namespace brotli {
+
+// Estimates how many bits the literals in the interval [pos, pos + len) in the
+// ringbuffer (data, mask) will take entropy coded and writes these estimates
+// to the cost[0..len) array.
+void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                 const uint8_t *data, float *cost);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_LITERAL_COST_H_
--- a/modules/brotli/enc/metablock.cc
+++ b/modules/brotli/enc/metablock.cc
@ -0,0 +1,539 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Algorithms for distributing the literals and commands of a metablock between
+// block types and contexts.
+
+#include "./metablock.h"
+
+#include "./block_splitter.h"
+#include "./context.h"
+#include "./cluster.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+void BuildMetaBlock(const uint8_t* ringbuffer,
+                    const size_t pos,
+                    const size_t mask,
+                    uint8_t prev_byte,
+                    uint8_t prev_byte2,
+                    const Command* cmds,
+                    size_t num_commands,
+                    ContextType literal_context_mode,
+                    MetaBlockSplit* mb) {
+  SplitBlock(cmds, num_commands,
+             ringbuffer, pos, mask,
+             &mb->literal_split,
+             &mb->command_split,
+             &mb->distance_split);
+
+  std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
+                                                 literal_context_mode);
+
+  size_t num_literal_contexts =
+      mb->literal_split.num_types << kLiteralContextBits;
+  size_t num_distance_contexts =
+      mb->distance_split.num_types << kDistanceContextBits;
+  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+  mb->command_histograms.resize(mb->command_split.num_types);
+  std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+  BuildHistograms(cmds, num_commands,
+                  mb->literal_split,
+                  mb->command_split,
+                  mb->distance_split,
+                  ringbuffer,
+                  pos,
+                  mask,
+                  prev_byte,
+                  prev_byte2,
+                  literal_context_modes,
+                  &literal_histograms,
+                  &mb->command_histograms,
+                  &distance_histograms);
+
+  // Histogram ids need to fit in one byte.
+  static const size_t kMaxNumberOfHistograms = 256;
+
+  ClusterHistograms(literal_histograms,
+                    1u << kLiteralContextBits,
+                    mb->literal_split.num_types,
+                    kMaxNumberOfHistograms,
+                    &mb->literal_histograms,
+                    &mb->literal_context_map);
+
+  ClusterHistograms(distance_histograms,
+                    1u << kDistanceContextBits,
+                    mb->distance_split.num_types,
+                    kMaxNumberOfHistograms,
+                    &mb->distance_histograms,
+                    &mb->distance_context_map);
+}
+
+// Greedy block splitter for one block category (literal, command or distance).
+template<typename HistogramType>
+class BlockSplitter {
+ public:
+  BlockSplitter(size_t alphabet_size,
+                size_t min_block_size,
+                double split_threshold,
+                size_t num_symbols,
+                BlockSplit* split,
+                std::vector<HistogramType>* histograms)
+      : alphabet_size_(alphabet_size),
+        min_block_size_(min_block_size),
+        split_threshold_(split_threshold),
+        num_blocks_(0),
+        split_(split),
+        histograms_(histograms),
+        target_block_size_(min_block_size),
+        block_size_(0),
+        curr_histogram_ix_(0),
+        merge_last_count_(0) {
+    size_t max_num_blocks = num_symbols / min_block_size + 1;
+    // We have to allocate one more histogram than the maximum number of block
+    // types for the current histogram when the meta-block is too big.
+    size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
+    split_->lengths.resize(max_num_blocks);
+    split_->types.resize(max_num_blocks);
+    histograms_->resize(max_num_types);
+    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
+  }
+
+  // Adds the next symbol to the current histogram. When the current histogram
+  // reaches the target size, decides on merging the block.
+  void AddSymbol(size_t symbol) {
+    (*histograms_)[curr_histogram_ix_].Add(symbol);
+    ++block_size_;
+    if (block_size_ == target_block_size_) {
+      FinishBlock(/* is_final = */ false);
+    }
+  }
+
+  // Does either of three things:
+  //   (1) emits the current block with a new block type;
+  //   (2) emits the current block with the type of the second last block;
+  //   (3) merges the current block with the last block.
+  void FinishBlock(bool is_final) {
+    if (block_size_ < min_block_size_) {
+      block_size_ = min_block_size_;
+    }
+    if (num_blocks_ == 0) {
+      // Create first block.
+      split_->lengths[0] = static_cast<uint32_t>(block_size_);
+      split_->types[0] = 0;
+      last_entropy_[0] =
+          BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
+      last_entropy_[1] = last_entropy_[0];
+      ++num_blocks_;
+      ++split_->num_types;
+      ++curr_histogram_ix_;
+      block_size_ = 0;
+    } else if (block_size_ > 0) {
+      double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
+                                   alphabet_size_);
+      HistogramType combined_histo[2];
+      double combined_entropy[2];
+      double diff[2];
+      for (size_t j = 0; j < 2; ++j) {
+        size_t last_histogram_ix = last_histogram_ix_[j];
+        combined_histo[j] = (*histograms_)[curr_histogram_ix_];
+        combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
+        combined_entropy[j] = BitsEntropy(
+            &combined_histo[j].data_[0], alphabet_size_);
+        diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
+      }
+
+      if (split_->num_types < kMaxBlockTypes &&
+          diff[0] > split_threshold_ &&
+          diff[1] > split_threshold_) {
+        // Create new block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
+        last_histogram_ix_[1] = last_histogram_ix_[0];
+        last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
+        last_entropy_[1] = last_entropy_[0];
+        last_entropy_[0] = entropy;
+        ++num_blocks_;
+        ++split_->num_types;
+        ++curr_histogram_ix_;
+        block_size_ = 0;
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else if (diff[1] < diff[0] - 20.0) {
+        // Combine this block with second last block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
+        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
+        (*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
+        last_entropy_[1] = last_entropy_[0];
+        last_entropy_[0] = combined_entropy[1];
+        ++num_blocks_;
+        block_size_ = 0;
+        (*histograms_)[curr_histogram_ix_].Clear();
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else {
+        // Combine this block with last block.
+        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
+        (*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
+        last_entropy_[0] = combined_entropy[0];
+        if (split_->num_types == 1) {
+          last_entropy_[1] = last_entropy_[0];
+        }
+        block_size_ = 0;
+        (*histograms_)[curr_histogram_ix_].Clear();
+        if (++merge_last_count_ > 1) {
+          target_block_size_ += min_block_size_;
+        }
+      }
+    }
+    if (is_final) {
+      (*histograms_).resize(split_->num_types);
+      split_->types.resize(num_blocks_);
+      split_->lengths.resize(num_blocks_);
+    }
+  }
+
+ private:
+  static const uint16_t kMaxBlockTypes = 256;
+
+  // Alphabet size of particular block category.
+  const size_t alphabet_size_;
+  // We collect at least this many symbols for each block.
+  const size_t min_block_size_;
+  // We merge histograms A and B if
+  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+  // where A is the current histogram and B is the histogram of the last or the
+  // second last block type.
+  const double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  // not owned
+  std::vector<HistogramType>* histograms_;  // not owned
+
+  // The number of symbols that we want to collect before deciding on whether
+  // or not to merge the block with a previous one or emit a new block.
+  size_t target_block_size_;
+  // The number of symbols in the current histogram.
+  size_t block_size_;
+  // Offset of the current histogram.
+  size_t curr_histogram_ix_;
+  // Offset of the histograms of the previous two block types.
+  size_t last_histogram_ix_[2];
+  // Entropy of the previous two block types.
+  double last_entropy_[2];
+  // The number of times we merged the current block with the last one.
+  size_t merge_last_count_;
+};
+
+void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
+                          size_t pos,
+                          size_t mask,
+                          const Command *commands,
+                          size_t n_commands,
+                          MetaBlockSplit* mb) {
+  size_t num_literals = 0;
+  for (size_t i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
+  }
+
+  BlockSplitter<HistogramLiteral> lit_blocks(
+      256, 512, 400.0, num_literals,
+      &mb->literal_split, &mb->literal_histograms);
+  BlockSplitter<HistogramCommand> cmd_blocks(
+      kNumCommandPrefixes, 1024, 500.0, n_commands,
+      &mb->command_split, &mb->command_histograms);
+  BlockSplitter<HistogramDistance> dist_blocks(
+      64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms);
+
+  for (size_t i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      lit_blocks.AddSymbol(ringbuffer[pos & mask]);
+      ++pos;
+    }
+    pos += cmd.copy_len();
+    if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
+      dist_blocks.AddSymbol(cmd.dist_prefix_);
+    }
+  }
+
+  lit_blocks.FinishBlock(/* is_final = */ true);
+  cmd_blocks.FinishBlock(/* is_final = */ true);
+  dist_blocks.FinishBlock(/* is_final = */ true);
+}
+
+// Greedy block splitter for one block category (literal, command or distance).
+// Gathers histograms for all context buckets.
+template<typename HistogramType>
+class ContextBlockSplitter {
+ public:
+  ContextBlockSplitter(size_t alphabet_size,
+                       size_t num_contexts,
+                       size_t min_block_size,
+                       double split_threshold,
+                       size_t num_symbols,
+                       BlockSplit* split,
+                       std::vector<HistogramType>* histograms)
+      : alphabet_size_(alphabet_size),
+        num_contexts_(num_contexts),
+        max_block_types_(kMaxBlockTypes / num_contexts),
+        min_block_size_(min_block_size),
+        split_threshold_(split_threshold),
+        num_blocks_(0),
+        split_(split),
+        histograms_(histograms),
+        target_block_size_(min_block_size),
+        block_size_(0),
+        curr_histogram_ix_(0),
+        last_entropy_(2 * num_contexts),
+        merge_last_count_(0) {
+    size_t max_num_blocks = num_symbols / min_block_size + 1;
+    // We have to allocate one more histogram than the maximum number of block
+    // types for the current histogram when the meta-block is too big.
+    size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
+    split_->lengths.resize(max_num_blocks);
+    split_->types.resize(max_num_blocks);
+    histograms_->resize(max_num_types * num_contexts);
+    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
+  }
+
+  // Adds the next symbol to the current block type and context. When the
+  // current block reaches the target size, decides on merging the block.
+  void AddSymbol(size_t symbol, size_t context) {
+    (*histograms_)[curr_histogram_ix_ + context].Add(symbol);
+    ++block_size_;
+    if (block_size_ == target_block_size_) {
+      FinishBlock(/* is_final = */ false);
+    }
+  }
+
+  // Does either of three things:
+  //   (1) emits the current block with a new block type;
+  //   (2) emits the current block with the type of the second last block;
+  //   (3) merges the current block with the last block.
+  void FinishBlock(bool is_final) {
+    if (block_size_ < min_block_size_) {
+      block_size_ = min_block_size_;
+    }
+    if (num_blocks_ == 0) {
+      // Create first block.
+      split_->lengths[0] = static_cast<uint32_t>(block_size_);
+      split_->types[0] = 0;
+      for (size_t i = 0; i < num_contexts_; ++i) {
+        last_entropy_[i] =
+            BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
+        last_entropy_[num_contexts_ + i] = last_entropy_[i];
+      }
+      ++num_blocks_;
+      ++split_->num_types;
+      curr_histogram_ix_ += num_contexts_;
+      block_size_ = 0;
+    } else if (block_size_ > 0) {
+      // Try merging the set of histograms for the current block type with the
+      // respective set of histograms for the last and second last block types.
+      // Decide over the split based on the total reduction of entropy across
+      // all contexts.
+      std::vector<double> entropy(num_contexts_);
+      std::vector<HistogramType> combined_histo(2 * num_contexts_);
+      std::vector<double> combined_entropy(2 * num_contexts_);
+      double diff[2] = { 0.0 };
+      for (size_t i = 0; i < num_contexts_; ++i) {
+        size_t curr_histo_ix = curr_histogram_ix_ + i;
+        entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
+                                 alphabet_size_);
+        for (size_t j = 0; j < 2; ++j) {
+          size_t jx = j * num_contexts_ + i;
+          size_t last_histogram_ix = last_histogram_ix_[j] + i;
+          combined_histo[jx] = (*histograms_)[curr_histo_ix];
+          combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
+          combined_entropy[jx] = BitsEntropy(
+              &combined_histo[jx].data_[0], alphabet_size_);
+          diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
+        }
+      }
+
+      if (split_->num_types < max_block_types_ &&
+          diff[0] > split_threshold_ &&
+          diff[1] > split_threshold_) {
+        // Create new block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
+        last_histogram_ix_[1] = last_histogram_ix_[0];
+        last_histogram_ix_[0] = split_->num_types * num_contexts_;
+        for (size_t i = 0; i < num_contexts_; ++i) {
+          last_entropy_[num_contexts_ + i] = last_entropy_[i];
+          last_entropy_[i] = entropy[i];
+        }
+        ++num_blocks_;
+        ++split_->num_types;
+        curr_histogram_ix_ += num_contexts_;
+        block_size_ = 0;
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else if (diff[1] < diff[0] - 20.0) {
+        // Combine this block with second last block.
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
+        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
+        for (size_t i = 0; i < num_contexts_; ++i) {
+          (*histograms_)[last_histogram_ix_[0] + i] =
+              combined_histo[num_contexts_ + i];
+          last_entropy_[num_contexts_ + i] = last_entropy_[i];
+          last_entropy_[i] = combined_entropy[num_contexts_ + i];
+          (*histograms_)[curr_histogram_ix_ + i].Clear();
+        }
+        ++num_blocks_;
+        block_size_ = 0;
+        merge_last_count_ = 0;
+        target_block_size_ = min_block_size_;
+      } else {
+        // Combine this block with last block.
+        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
+        for (size_t i = 0; i < num_contexts_; ++i) {
+          (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
+          last_entropy_[i] = combined_entropy[i];
+          if (split_->num_types == 1) {
+            last_entropy_[num_contexts_ + i] = last_entropy_[i];
+          }
+          (*histograms_)[curr_histogram_ix_ + i].Clear();
+        }
+        block_size_ = 0;
+        if (++merge_last_count_ > 1) {
+          target_block_size_ += min_block_size_;
+        }
+      }
+    }
+    if (is_final) {
+      (*histograms_).resize(split_->num_types * num_contexts_);
+      split_->types.resize(num_blocks_);
+      split_->lengths.resize(num_blocks_);
+    }
+  }
+
+ private:
+  static const int kMaxBlockTypes = 256;
+
+  // Alphabet size of particular block category.
+  const size_t alphabet_size_;
+  const size_t num_contexts_;
+  const size_t max_block_types_;
+  // We collect at least this many symbols for each block.
+  const size_t min_block_size_;
+  // We merge histograms A and B if
+  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+  // where A is the current histogram and B is the histogram of the last or the
+  // second last block type.
+  const double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  // not owned
+  std::vector<HistogramType>* histograms_;  // not owned
+
+  // The number of symbols that we want to collect before deciding on whether
+  // or not to merge the block with a previous one or emit a new block.
+  size_t target_block_size_;
+  // The number of symbols in the current histogram.
+  size_t block_size_;
+  // Offset of the current histogram.
+  size_t curr_histogram_ix_;
+  // Offset of the histograms of the previous two block types.
+  size_t last_histogram_ix_[2];
+  // Entropy of the previous two block types.
+  std::vector<double> last_entropy_;
+  // The number of times we merged the current block with the last one.
+  size_t merge_last_count_;
+};
+
+void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
+                                      size_t pos,
+                                      size_t mask,
+                                      uint8_t prev_byte,
+                                      uint8_t prev_byte2,
+                                      ContextType literal_context_mode,
+                                      size_t num_contexts,
+                                      const uint32_t* static_context_map,
+                                      const Command *commands,
+                                      size_t n_commands,
+                                      MetaBlockSplit* mb) {
+  size_t num_literals = 0;
+  for (size_t i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
+  }
+
+  ContextBlockSplitter<HistogramLiteral> lit_blocks(
+      256, num_contexts, 512, 400.0, num_literals,
+      &mb->literal_split, &mb->literal_histograms);
+  BlockSplitter<HistogramCommand> cmd_blocks(
+      kNumCommandPrefixes, 1024, 500.0, n_commands,
+      &mb->command_split, &mb->command_histograms);
+  BlockSplitter<HistogramDistance> dist_blocks(
+      64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms);
+
+  for (size_t i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
+      uint8_t literal = ringbuffer[pos & mask];
+      lit_blocks.AddSymbol(literal, static_context_map[context]);
+      prev_byte2 = prev_byte;
+      prev_byte = literal;
+      ++pos;
+    }
+    pos += cmd.copy_len();
+    if (cmd.copy_len()) {
+      prev_byte2 = ringbuffer[(pos - 2) & mask];
+      prev_byte = ringbuffer[(pos - 1) & mask];
+      if (cmd.cmd_prefix_ >= 128) {
+        dist_blocks.AddSymbol(cmd.dist_prefix_);
+      }
+    }
+  }
+
+  lit_blocks.FinishBlock(/* is_final = */ true);
+  cmd_blocks.FinishBlock(/* is_final = */ true);
+  dist_blocks.FinishBlock(/* is_final = */ true);
+
+  mb->literal_context_map.resize(
+      mb->literal_split.num_types << kLiteralContextBits);
+  for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
+    for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
+      mb->literal_context_map[(i << kLiteralContextBits) + j] =
+          static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
+    }
+  }
+}
+
+void OptimizeHistograms(size_t num_direct_distance_codes,
+                        size_t distance_postfix_bits,
+                        MetaBlockSplit* mb) {
+  uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
+  for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
+    OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
+                                good_for_rle);
+  }
+  for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
+    OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
+                                &mb->command_histograms[i].data_[0],
+                                good_for_rle);
+  }
+  size_t num_distance_codes =
+      kNumDistanceShortCodes + num_direct_distance_codes +
+      (48u << distance_postfix_bits);
+  for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
+    OptimizeHuffmanCountsForRle(num_distance_codes,
+                                &mb->distance_histograms[i].data_[0],
+                                good_for_rle);
+  }
+  delete[] good_for_rle;
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/metablock.h
+++ b/modules/brotli/enc/metablock.h
@ -0,0 +1,80 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Algorithms for distributing the literals and commands of a metablock between
+// block types and contexts.
+
+#ifndef BROTLI_ENC_METABLOCK_H_
+#define BROTLI_ENC_METABLOCK_H_
+
+#include <vector>
+
+#include "./command.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+struct BlockSplit {
+  BlockSplit(void) : num_types(0) {}
+
+  size_t num_types;
+  std::vector<uint8_t> types;
+  std::vector<uint32_t> lengths;
+};
+
+struct MetaBlockSplit {
+  BlockSplit literal_split;
+  BlockSplit command_split;
+  BlockSplit distance_split;
+  std::vector<uint32_t> literal_context_map;
+  std::vector<uint32_t> distance_context_map;
+  std::vector<HistogramLiteral> literal_histograms;
+  std::vector<HistogramCommand> command_histograms;
+  std::vector<HistogramDistance> distance_histograms;
+};
+
+// Uses the slow shortest-path block splitter and does context clustering.
+void BuildMetaBlock(const uint8_t* ringbuffer,
+                    const size_t pos,
+                    const size_t mask,
+                    uint8_t prev_byte,
+                    uint8_t prev_byte2,
+                    const Command* cmds,
+                    size_t num_commands,
+                    ContextType literal_context_mode,
+                    MetaBlockSplit* mb);
+
+// Uses a fast greedy block splitter that tries to merge current block with the
+// last or the second last block and does not do any context modeling.
+void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
+                          size_t pos,
+                          size_t mask,
+                          const Command *commands,
+                          size_t n_commands,
+                          MetaBlockSplit* mb);
+
+// Uses a fast greedy block splitter that tries to merge current block with the
+// last or the second last block and uses a static context clustering which
+// is the same for all block types.
+void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
+                                      size_t pos,
+                                      size_t mask,
+                                      uint8_t prev_byte,
+                                      uint8_t prev_byte2,
+                                      ContextType literal_context_mode,
+                                      size_t num_contexts,
+                                      const uint32_t* static_context_map,
+                                      const Command *commands,
+                                      size_t n_commands,
+                                      MetaBlockSplit* mb);
+
+void OptimizeHistograms(size_t num_direct_distance_codes,
+                        size_t distance_postfix_bits,
+                        MetaBlockSplit* mb);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_METABLOCK_H_
--- a/modules/brotli/enc/port.h
+++ b/modules/brotli/enc/port.h
@ -0,0 +1,142 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Macros for endianness, branch prediction and unaligned loads and stores.
+
+#ifndef BROTLI_ENC_PORT_H_
+#define BROTLI_ENC_PORT_H_
+
+#include <assert.h>
+#include <string.h>
+#include "./types.h"
+
+#if defined OS_LINUX || defined OS_CYGWIN
+#include <endian.h>
+#elif defined OS_FREEBSD
+#include <machine/endian.h>
+#elif defined OS_MACOSX
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER  BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#endif
+
+// define the macro IS_LITTLE_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif
+
+#else
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#endif
+#endif  // __BYTE_ORDER
+
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#endif
+
+// Enable little-endian optimization for x64 architecture on Windows.
+#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
+#define IS_LITTLE_ENDIAN
+#endif
+
+/* Compatibility with non-clang compilers. */
+#ifndef __has_builtin
+#define __has_builtin(x) 0
+#endif
+
+#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
+    (defined(__llvm__) && __has_builtin(__builtin_expect))
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define PREDICT_FALSE(x) (x)
+#define PREDICT_TRUE(x) (x)
+#endif
+
+// Portable handling of unaligned loads, stores, and copies.
+// On some platforms, like ARM, the copy functions can be more efficient
+// then a load and a store.
+
+#if defined(ARCH_PIII) || \
+  defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
+
+// x86 and x86-64 can perform unaligned loads/stores directly;
+// modern PowerPC hardware can also do unaligned integer loads and stores;
+// but note: the FPU still sends unaligned loads and stores to a trap handler!
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
+
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+#define BROTLI_UNALIGNED_STORE64(_p, _val) \
+  (*reinterpret_cast<uint64_t *>(_p) = (_val))
+
+#elif defined(__arm__) && \
+  !defined(__ARM_ARCH_5__) && \
+  !defined(__ARM_ARCH_5T__) && \
+  !defined(__ARM_ARCH_5TE__) && \
+  !defined(__ARM_ARCH_5TEJ__) && \
+  !defined(__ARM_ARCH_6__) && \
+  !defined(__ARM_ARCH_6J__) && \
+  !defined(__ARM_ARCH_6K__) && \
+  !defined(__ARM_ARCH_6Z__) && \
+  !defined(__ARM_ARCH_6ZK__) && \
+  !defined(__ARM_ARCH_6T2__)
+
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode).
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
+  uint32_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#endif
+
+#endif  // BROTLI_ENC_PORT_H_
--- a/modules/brotli/enc/prefix.h
+++ b/modules/brotli/enc/prefix.h
@ -0,0 +1,79 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include "./fast_log.h"
+#include "./types.h"
+
+namespace brotli {
+
+static const uint32_t kNumInsertLenPrefixes = 24;
+static const uint32_t kNumCopyLenPrefixes = 24;
+static const uint32_t kNumCommandPrefixes = 704;
+static const uint32_t kNumBlockLenPrefixes = 26;
+static const uint32_t kNumDistanceShortCodes = 16;
+static const uint32_t kNumDistancePrefixes = 520;
+
+// Represents the range of values belonging to a prefix code:
+// [offset, offset + 2^nbits)
+struct PrefixCodeRange {
+  uint32_t offset;
+  uint32_t nbits;
+};
+
+static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
+  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
+  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
+  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
+  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
+  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
+  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
+  {8433, 13}, {16625, 24}
+};
+
+inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
+                                     uint32_t* n_extra, uint32_t* extra) {
+  *code = 0;
+  while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
+    ++(*code);
+  }
+  *n_extra = kBlockLengthPrefixCode[*code].nbits;
+  *extra = len - kBlockLengthPrefixCode[*code].offset;
+}
+
+inline void PrefixEncodeCopyDistance(size_t distance_code,
+                                     size_t num_direct_codes,
+                                     size_t postfix_bits,
+                                     uint16_t* code,
+                                     uint32_t* extra_bits) {
+  if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
+    *code = static_cast<uint16_t>(distance_code);
+    *extra_bits = 0;
+    return;
+  }
+  distance_code -= kNumDistanceShortCodes + num_direct_codes;  /* >= 0 */
+  distance_code += (1u << (postfix_bits + 2u));  /* > 0 */
+  size_t bucket = Log2FloorNonZero(distance_code) - 1;
+  size_t postfix_mask = (1 << postfix_bits) - 1;
+  size_t postfix = distance_code & postfix_mask;
+  size_t prefix = (distance_code >> bucket) & 1;
+  size_t offset = (2 + prefix) << bucket;
+  size_t nbits = bucket - postfix_bits;
+  *code = static_cast<uint16_t>(
+      (kNumDistanceShortCodes + num_direct_codes +
+       ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
+  *extra_bits = static_cast<uint32_t>(
+      (nbits << 24) | ((distance_code - offset) >> postfix_bits));
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_PREFIX_H_
--- a/modules/brotli/enc/ringbuffer.h
+++ b/modules/brotli/enc/ringbuffer.h
@ -0,0 +1,145 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Sliding window over the input data.
+
+#ifndef BROTLI_ENC_RINGBUFFER_H_
+#define BROTLI_ENC_RINGBUFFER_H_
+
+#include <cstdlib>  /* free, realloc */
+
+#include "./port.h"
+#include "./types.h"
+
+namespace brotli {
+
+// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
+// data in a circular manner: writing a byte writes it to:
+//   `position() % (1 << window_bits)'.
+// For convenience, the RingBuffer array contains another copy of the
+// first `1 << tail_bits' bytes:
+//   buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
+// and another copy of the last two bytes:
+//   buffer_[-1] == buffer_[(1 << window_bits) - 1] and
+//   buffer_[-2] == buffer_[(1 << window_bits) - 2].
+class RingBuffer {
+ public:
+  RingBuffer(int window_bits, int tail_bits)
+      : size_(1u << window_bits),
+        mask_((1u << window_bits) - 1),
+        tail_size_(1u << tail_bits),
+        total_size_(size_ + tail_size_),
+        cur_size_(0),
+        pos_(0),
+        data_(0),
+        buffer_(0) {}
+
+  ~RingBuffer(void) {
+    free(data_);
+  }
+
+  // Allocates or re-allocates data_ to the given length + plus some slack
+  // region before and after. Fills the slack regions with zeros.
+  inline void InitBuffer(const uint32_t buflen) {
+    static const size_t kSlackForEightByteHashingEverywhere = 7;
+    cur_size_ = buflen;
+    data_ = static_cast<uint8_t*>(realloc(
+        data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
+    buffer_ = data_ + 2;
+    buffer_[-2] = buffer_[-1] = 0;
+    for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
+      buffer_[cur_size_ + i] = 0;
+    }
+  }
+
+  // Push bytes into the ring buffer.
+  void Write(const uint8_t *bytes, size_t n) {
+    if (pos_ == 0 && n < tail_size_) {
+      // Special case for the first write: to process the first block, we don't
+      // need to allocate the whole ringbuffer and we don't need the tail
+      // either. However, we do this memory usage optimization only if the
+      // first write is less than the tail size, which is also the input block
+      // size, otherwise it is likely that other blocks will follow and we
+      // will need to reallocate to the full size anyway.
+      pos_ = static_cast<uint32_t>(n);
+      InitBuffer(pos_);
+      memcpy(buffer_, bytes, n);
+      return;
+    }
+    if (cur_size_ < total_size_) {
+      // Lazily allocate the full buffer.
+      InitBuffer(total_size_);
+      // Initialize the last two bytes to zero, so that we don't have to worry
+      // later when we copy the last two bytes to the first two positions.
+      buffer_[size_ - 2] = 0;
+      buffer_[size_ - 1] = 0;
+    }
+    const size_t masked_pos = pos_ & mask_;
+    // The length of the writes is limited so that we do not need to worry
+    // about a write
+    WriteTail(bytes, n);
+    if (PREDICT_TRUE(masked_pos + n <= size_)) {
+      // A single write fits.
+      memcpy(&buffer_[masked_pos], bytes, n);
+    } else {
+      // Split into two writes.
+      // Copy into the end of the buffer, including the tail buffer.
+      memcpy(&buffer_[masked_pos], bytes,
+             std::min(n, total_size_ - masked_pos));
+      // Copy into the beginning of the buffer
+      memcpy(&buffer_[0], bytes + (size_ - masked_pos),
+             n - (size_ - masked_pos));
+    }
+    buffer_[-2] = buffer_[size_ - 2];
+    buffer_[-1] = buffer_[size_ - 1];
+    pos_ += static_cast<uint32_t>(n);
+    if (pos_ > (1u << 30)) {  /* Wrap, but preserve not-a-first-lap feature. */
+      pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
+    }
+  }
+
+  void Reset(void) {
+    pos_ = 0;
+  }
+
+  // Logical cursor position in the ring buffer.
+  uint32_t position(void) const { return pos_; }
+
+  // Bit mask for getting the physical position for a logical position.
+  uint32_t mask(void) const { return mask_; }
+
+  uint8_t *start(void) { return &buffer_[0]; }
+  const uint8_t *start(void) const { return &buffer_[0]; }
+
+ private:
+  void WriteTail(const uint8_t *bytes, size_t n) {
+    const size_t masked_pos = pos_ & mask_;
+    if (PREDICT_FALSE(masked_pos < tail_size_)) {
+      // Just fill the tail buffer with the beginning data.
+      const size_t p = size_ + masked_pos;
+      memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
+    }
+  }
+
+  // Size of the ringbuffer is (1 << window_bits) + tail_size_.
+  const uint32_t size_;
+  const uint32_t mask_;
+  const uint32_t tail_size_;
+  const uint32_t total_size_;
+
+  uint32_t cur_size_;
+  // Position to write in the ring buffer.
+  uint32_t pos_;
+  // The actual ring buffer containing the copy of the last two bytes, the data,
+  // and the copy of the beginning as a tail.
+  uint8_t *data_;
+  // The start of the ringbuffer.
+  uint8_t *buffer_;
+};
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_RINGBUFFER_H_
--- a/modules/brotli/enc/static_dict.cc
+++ b/modules/brotli/enc/static_dict.cc
@ -0,0 +1,455 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+#include "./static_dict.h"
+
+#include <algorithm>
+
+#include "./dictionary.h"
+#include "./find_match_length.h"
+#include "./static_dict_lut.h"
+#include "./transform.h"
+
+namespace brotli {
+
+inline uint32_t Hash(const uint8_t *data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
+  // The higher bits contain more mixture from the multiplication,
+  // so we take our results from there.
+  return h >> (32 - kDictNumBits);
+}
+
+inline void AddMatch(size_t distance, size_t len, size_t len_code,
+                     uint32_t* matches) {
+  uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
+  matches[len] = std::min(matches[len], match);
+}
+
+inline size_t DictMatchLength(const uint8_t* data,
+                              size_t id,
+                              size_t len,
+                              size_t maxlen) {
+  const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
+  return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
+                                  std::min(len, maxlen));
+}
+
+inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
+  if (w.len > max_length) return false;
+  const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
+  const uint8_t* dict = &kBrotliDictionary[offset];
+  if (w.transform == 0) {
+    // Match against base dictionary word.
+    return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
+  } else if (w.transform == 10) {
+    // Match against uppercase first transform.
+    // Note that there are only ASCII uppercase words in the lookup table.
+    return (dict[0] >= 'a' && dict[0] <= 'z' &&
+            (dict[0] ^ 32) == data[0] &&
+            FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
+            w.len - 1u);
+  } else {
+    // Match against uppercase all transform.
+    // Note that there are only ASCII uppercase words in the lookup table.
+    for (size_t i = 0; i < w.len; ++i) {
+      if (dict[i] >= 'a' && dict[i] <= 'z') {
+        if ((dict[i] ^ 32) != data[i]) return false;
+      } else {
+        if (dict[i] != data[i]) return false;
+      }
+    }
+    return true;
+  }
+}
+
+bool FindAllStaticDictionaryMatches(const uint8_t* data,
+                                    size_t min_length,
+                                    size_t max_length,
+                                    uint32_t* matches) {
+  bool found_match = false;
+  size_t key = Hash(data);
+  size_t bucket = kStaticDictionaryBuckets[key];
+  if (bucket != 0) {
+    size_t num = bucket & 0xff;
+    size_t offset = bucket >> 8;
+    for (size_t i = 0; i < num; ++i) {
+      const DictWord w = kStaticDictionaryWords[offset + i];
+      const size_t l = w.len;
+      const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+      const size_t id = w.idx;
+      if (w.transform == 0) {
+        const size_t matchlen = DictMatchLength(data, id, l, max_length);
+        // Transform "" + kIdentity + ""
+        if (matchlen == l) {
+          AddMatch(id, l, l, matches);
+          found_match = true;
+        }
+        // Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
+        if (matchlen >= l - 1) {
+          AddMatch(id + 12 * n, l - 1, l, matches);
+          if (l + 2 < max_length &&
+              data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
+              data[l + 2] == ' ') {
+            AddMatch(id + 49 * n, l + 3, l, matches);
+          }
+          found_match = true;
+        }
+        // Transform "" + kOmitLastN + "" (N = 2 .. 9)
+        size_t minlen = min_length;
+        if (l > 9) minlen = std::max(minlen, l - 9);
+        size_t maxlen = std::min(matchlen, l - 2);
+        for (size_t len = minlen; len <= maxlen; ++len) {
+          AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
+          found_match = true;
+        }
+        if (matchlen < l || l + 6 >= max_length) {
+          continue;
+        }
+        const uint8_t* s = &data[l];
+        // Transforms "" + kIdentity + <suffix>
+        if (s[0] == ' ') {
+          AddMatch(id + n, l + 1, l, matches);
+          if (s[1] == 'a') {
+            if (s[2] == ' ') {
+              AddMatch(id + 28 * n, l + 3, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
+            } else if (s[2] == 't') {
+              if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == 'd' && s[4] == ' ') {
+                AddMatch(id + 10 * n, l + 5, l, matches);
+              }
+            }
+          } else if (s[1] == 'b') {
+            if (s[2] == 'y' && s[3] == ' ') {
+              AddMatch(id + 38 * n, l + 4, l, matches);
+          }
+          } else if (s[1] == 'i') {
+            if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
+            } else if (s[2] == 's') {
+              if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'f') {
+            if (s[2] == 'o') {
+              if (s[3] == 'r' && s[4] == ' ') {
+                AddMatch(id + 25 * n, l + 5, l, matches);
+              }
+            } else if (s[2] == 'r') {
+              if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
+                AddMatch(id + 37 * n, l + 6, l, matches);
+              }
+            }
+          } else if (s[1] == 'o') {
+            if (s[2] == 'f') {
+              if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
+            } else if (s[2] == 'n') {
+              if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'n') {
+            if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
+              AddMatch(id + 80 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 't') {
+            if (s[2] == 'h') {
+              if (s[3] == 'e') {
+                if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
+              } else if (s[3] == 'a') {
+                if (s[4] == 't' && s[5] == ' ') {
+                  AddMatch(id + 29 * n, l + 6, l, matches);
+                }
+              }
+            } else if (s[2] == 'o') {
+              if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'w') {
+            if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
+              AddMatch(id + 35 * n, l + 6, l, matches);
+            }
+          }
+        } else if (s[0] == '"') {
+          AddMatch(id + 19 * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + 21 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + 20 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 31 * n, l + 2, l, matches);
+            if (s[2] == 'T' && s[3] == 'h') {
+              if (s[4] == 'e') {
+                if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
+              } else if (s[4] == 'i') {
+                if (s[5] == 's' && s[6] == ' ') {
+                  AddMatch(id + 75 * n, l + 7, l, matches);
+                }
+              }
+            }
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + 76 * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + 14 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\n') {
+          AddMatch(id + 22 * n, l + 1, l, matches);
+          if (s[1] == '\t') {
+            AddMatch(id + 50 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ']') {
+          AddMatch(id + 24 * n, l + 1, l, matches);
+        } else if (s[0] == '\'') {
+          AddMatch(id + 36 * n, l + 1, l, matches);
+        } else if (s[0] == ':') {
+          AddMatch(id + 51 * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + 57 * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + 70 * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + 86 * n, l + 2, l, matches);
+          }
+        } else if (s[0] == 'a') {
+          if (s[1] == 'l' && s[2] == ' ') {
+            AddMatch(id + 84 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'e') {
+          if (s[1] == 'd') {
+            if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
+          } else if (s[1] == 'r') {
+            if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
+          } else if (s[1] == 's') {
+            if (s[2] == 't' && s[3] == ' ') {
+              AddMatch(id + 95 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'f') {
+          if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
+            AddMatch(id + 90 * n, l + 4, l, matches);
+          }
+        } else if (s[0] == 'i') {
+          if (s[1] == 'v') {
+            if (s[2] == 'e' && s[3] == ' ') {
+            AddMatch(id + 92 * n, l + 4, l, matches);
+            }
+          } else if (s[1] == 'z') {
+            if (s[2] == 'e' && s[3] == ' ') {
+              AddMatch(id + 100 * n, l + 4, l, matches);
+            }
+          }
+        } else if (s[0] == 'l') {
+          if (s[1] == 'e') {
+            if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
+              AddMatch(id + 93 * n, l + 5, l, matches);
+            }
+          } else if (s[1] == 'y') {
+            if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
+          }
+        } else if (s[0] == 'o') {
+          if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
+            AddMatch(id + 106 * n, l + 4, l, matches);
+          }
+        }
+      } else {
+        // Set t=false for kUppercaseFirst and
+        //     t=true otherwise (kUppercaseAll) transform.
+        const bool t = w.transform != kUppercaseFirst;
+        if (!IsMatch(w, data, max_length)) {
+          continue;
+        }
+        // Transform "" + kUppercase{First,All} + ""
+        AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
+        found_match = true;
+        if (l + 1 >= max_length) {
+          continue;
+        }
+        // Transforms "" + kUppercase{First,All} + <suffix>
+        const uint8_t* s = &data[l];
+        if (s[0] == ' ') {
+          AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
+        } else if (s[0] == '"') {
+          AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
+          if (s[1] == '>') {
+            AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == ',') {
+          AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
+          }
+        } else if (s[0] == '\'') {
+          AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
+          }
+        }
+      }
+    }
+  }
+  // Transforms with prefixes " " and "."
+  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
+    bool is_space = (data[0] == ' ');
+    key = Hash(&data[1]);
+    bucket = kStaticDictionaryBuckets[key];
+    size_t num = bucket & 0xff;
+    size_t offset = bucket >> 8;
+    for (size_t i = 0; i < num; ++i) {
+      const DictWord w = kStaticDictionaryWords[offset + i];
+      const size_t l = w.len;
+      const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+      const size_t id = w.idx;
+      if (w.transform == 0) {
+        if (!IsMatch(w, &data[1], max_length - 1)) {
+          continue;
+        }
+        // Transforms " " + kIdentity + "" and "." + kIdentity + ""
+        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
+        found_match = true;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        // Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
+        const uint8_t* s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
+        } else if (s[0] == '(') {
+          AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
+        } else if (is_space) {
+          if (s[0] == ',') {
+            AddMatch(id + 103 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 33 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '.') {
+            AddMatch(id + 71 * n, l + 2, l, matches);
+            if (s[1] == ' ') {
+              AddMatch(id + 52 * n, l + 3, l, matches);
+            }
+          } else if (s[0] == '=') {
+            if (s[1] == '"') {
+              AddMatch(id + 81 * n, l + 3, l, matches);
+            } else if (s[1] == '\'') {
+              AddMatch(id + 98 * n, l + 3, l, matches);
+            }
+          }
+        }
+      } else if (is_space) {
+        // Set t=false for kUppercaseFirst and
+        //     t=true otherwise (kUppercaseAll) transform.
+        const bool t = w.transform != kUppercaseFirst;
+        if (!IsMatch(w, &data[1], max_length - 1)) {
+          continue;
+        }
+        // Transforms " " + kUppercase{First,All} + ""
+        AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
+        found_match = true;
+        if (l + 2 >= max_length) {
+          continue;
+        }
+        // Transforms " " + kUppercase{First,All} + <suffix>
+        const uint8_t* s = &data[l + 1];
+        if (s[0] == ' ') {
+          AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
+        } else if (s[0] == ',') {
+          if (!t) {
+            AddMatch(id + 109 * n, l + 2, l, matches);
+        }
+          if (s[1] == ' ') {
+            AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '.') {
+          AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
+          if (s[1] == ' ') {
+            AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
+          }
+        } else if (s[0] == '=') {
+          if (s[1] == '"') {
+            AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
+          } else if (s[1] == '\'') {
+            AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 6) {
+    // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
+    if ((data[1] == ' ' &&
+         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
+        (data[0] == 0xc2 && data[1] == 0xa0)) {
+      key = Hash(&data[2]);
+      bucket = kStaticDictionaryBuckets[key];
+      size_t num = bucket & 0xff;
+      size_t offset = bucket >> 8;
+      for (size_t i = 0; i < num; ++i) {
+        const DictWord w = kStaticDictionaryWords[offset + i];
+        const size_t l = w.len;
+        const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+        const size_t id = w.idx;
+        if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
+          if (data[0] == 0xc2) {
+            AddMatch(id + 102 * n, l + 2, l, matches);
+            found_match = true;
+          } else if (l + 2 < max_length && data[l + 2] == ' ') {
+            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
+            AddMatch(id + t * n, l + 3, l, matches);
+            found_match = true;
+          }
+        }
+      }
+    }
+  }
+  if (max_length >= 9) {
+    // Transforms with prefixes " the " and ".com/"
+    if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
+         data[3] == 'e' && data[4] == ' ') ||
+        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
+         data[3] == 'm' && data[4] == '/')) {
+      key = Hash(&data[5]);
+      bucket = kStaticDictionaryBuckets[key];
+      size_t num = bucket & 0xff;
+      size_t offset = bucket >> 8;
+      for (size_t i = 0; i < num; ++i) {
+        const DictWord w = kStaticDictionaryWords[offset + i];
+        const size_t l = w.len;
+        const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+        const size_t id = w.idx;
+        if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
+          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
+          found_match = true;
+          if (l + 5 < max_length) {
+            const uint8_t* s = &data[l + 5];
+            if (data[0] == ' ') {
+              if (l + 8 < max_length &&
+                  s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
+                AddMatch(id + 62 * n, l + 9, l, matches);
+                if (l + 12 < max_length &&
+                    s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
+                  AddMatch(id + 73 * n, l + 13, l, matches);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return found_match;
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/static_dict.h
+++ b/modules/brotli/enc/static_dict.h
@ -0,0 +1,32 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Class to model the static dictionary.
+
+#ifndef BROTLI_ENC_STATIC_DICT_H_
+#define BROTLI_ENC_STATIC_DICT_H_
+
+#include "./types.h"
+
+namespace brotli {
+
+static const size_t kMaxDictionaryMatchLen = 37;
+static const uint32_t kInvalidMatch = 0xfffffff;
+
+// Matches data against static dictionary words, and for each length l,
+// for which a match is found, updates matches[l] to be the minimum possible
+//   (distance << 5) + len_code.
+// Prerequisites:
+//   matches array is at least kMaxDictionaryMatchLen + 1 long
+//   all elements are initialized to kInvalidMatch
+bool FindAllStaticDictionaryMatches(const uint8_t* data,
+                                    size_t min_length,
+                                    size_t max_length,
+                                    uint32_t* matches);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_STATIC_DICT_H_
--- a/modules/brotli/enc/static_dict_lut.h
+++ b/modules/brotli/enc/static_dict_lut.h
--- a/modules/brotli/enc/streams.cc
+++ b/modules/brotli/enc/streams.cc
@ -0,0 +1,114 @@
+/* Copyright 2009 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Convience routines to make Brotli I/O classes from some memory containers and
+// files.
+
+#include "./streams.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+namespace brotli {
+
+BrotliMemOut::BrotliMemOut(void* buf, size_t len)
+    : buf_(buf),
+      len_(len),
+      pos_(0) {}
+
+void BrotliMemOut::Reset(void* buf, size_t len) {
+  buf_ = buf;
+  len_ = len;
+  pos_ = 0;
+}
+
+// Brotli output routine: copy n bytes to the output buffer.
+bool BrotliMemOut::Write(const void *buf, size_t n) {
+  if (n + pos_ > len_)
+    return false;
+  char* p = reinterpret_cast<char*>(buf_) + pos_;
+  memcpy(p, buf, n);
+  pos_ += n;
+  return true;
+}
+
+BrotliStringOut::BrotliStringOut(std::string* buf, size_t max_size)
+    : buf_(buf),
+      max_size_(max_size) {
+  assert(buf->empty());
+}
+
+void BrotliStringOut::Reset(std::string* buf, size_t max_size) {
+  buf_ = buf;
+  max_size_ = max_size;
+}
+
+// Brotli output routine: add n bytes to a string.
+bool BrotliStringOut::Write(const void *buf, size_t n) {
+  if (buf_->size() + n > max_size_)
+    return false;
+  buf_->append(static_cast<const char*>(buf), n);
+  return true;
+}
+
+BrotliMemIn::BrotliMemIn(const void* buf, size_t len)
+    : buf_(buf),
+      len_(len),
+      pos_(0) {}
+
+void BrotliMemIn::Reset(const void* buf, size_t len) {
+  buf_ = buf;
+  len_ = len;
+  pos_ = 0;
+}
+
+// Brotli input routine: read the next chunk of memory.
+const void* BrotliMemIn::Read(size_t n, size_t* output) {
+  if (pos_ == len_) {
+    return NULL;
+  }
+  if (n > len_ - pos_)
+    n = len_ - pos_;
+  const char* p = reinterpret_cast<const char*>(buf_) + pos_;
+  pos_ += n;
+  *output = n;
+  return p;
+}
+
+BrotliFileIn::BrotliFileIn(FILE* f, size_t max_read_size)
+    : f_(f),
+      buf_(new char[max_read_size]),
+      buf_size_(max_read_size) { }
+
+BrotliFileIn::~BrotliFileIn(void) {
+  delete[] buf_;
+}
+
+const void* BrotliFileIn::Read(size_t n, size_t* bytes_read) {
+  if (n > buf_size_) {
+    n = buf_size_;
+  } else if (n == 0) {
+    return feof(f_) ? NULL : buf_;
+  }
+  *bytes_read = fread(buf_, 1, n, f_);
+  if (*bytes_read == 0) {
+    return NULL;
+  } else {
+    return buf_;
+  }
+}
+
+BrotliFileOut::BrotliFileOut(FILE* f) : f_(f) {}
+
+bool BrotliFileOut::Write(const void* buf, size_t n) {
+  if (fwrite(buf, n, 1, f_) != 1) {
+    return false;
+  }
+  return true;
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/streams.h
+++ b/modules/brotli/enc/streams.h
@ -0,0 +1,121 @@
+/* Copyright 2009 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Input and output classes for streaming brotli compression.
+
+#ifndef BROTLI_ENC_STREAMS_H_
+#define BROTLI_ENC_STREAMS_H_
+
+#include <stdio.h>
+#include <string>
+#include "./port.h"
+#include "./types.h"
+
+namespace brotli {
+
+// Input interface for the compression routines.
+class BrotliIn {
+ public:
+  virtual ~BrotliIn(void) {}
+
+  // Return a pointer to the next block of input of at most n bytes.
+  // Return the actual length in *nread.
+  // At end of data, return NULL. Don't return NULL if there is more data
+  // to read, even if called with n == 0.
+  // Read will only be called if some of its bytes are needed.
+  virtual const void* Read(size_t n, size_t* nread) = 0;
+};
+
+// Output interface for the compression routines.
+class BrotliOut {
+ public:
+  virtual ~BrotliOut(void) {}
+
+  // Write n bytes of data from buf.
+  // Return true if all written, false otherwise.
+  virtual bool Write(const void *buf, size_t n) = 0;
+};
+
+// Adapter class to make BrotliIn objects from raw memory.
+class BrotliMemIn : public BrotliIn {
+ public:
+  BrotliMemIn(const void* buf, size_t len);
+
+  void Reset(const void* buf, size_t len);
+
+  // returns the amount of data consumed
+  size_t position(void) const { return pos_; }
+
+  const void* Read(size_t n, size_t* OUTPUT);
+
+ private:
+  const void* buf_;  // start of input buffer
+  size_t len_;  // length of input
+  size_t pos_;  // current read position within input
+};
+
+// Adapter class to make BrotliOut objects from raw memory.
+class BrotliMemOut : public BrotliOut {
+ public:
+  BrotliMemOut(void* buf, size_t len);
+
+  void Reset(void* buf, size_t len);
+
+  // returns the amount of data written
+  size_t position(void) const { return pos_; }
+
+  bool Write(const void* buf, size_t n);
+
+ private:
+  void* buf_;  // start of output buffer
+  size_t len_;  // length of output
+  size_t pos_;  // current write position within output
+};
+
+// Adapter class to make BrotliOut objects from a string.
+class BrotliStringOut : public BrotliOut {
+ public:
+  // Create a writer that appends its data to buf.
+  // buf->size() will grow to at most max_size
+  // buf is expected to be empty when constructing BrotliStringOut.
+  BrotliStringOut(std::string* buf, size_t max_size);
+
+  void Reset(std::string* buf, size_t max_len);
+
+  bool Write(const void* buf, size_t n);
+
+ private:
+  std::string* buf_;  // start of output buffer
+  size_t max_size_;  // max length of output
+};
+
+// Adapter class to make BrotliIn object from a file.
+class BrotliFileIn : public BrotliIn {
+ public:
+  BrotliFileIn(FILE* f, size_t max_read_size);
+  ~BrotliFileIn(void);
+
+  const void* Read(size_t n, size_t* bytes_read);
+
+ private:
+  FILE* f_;
+  char* buf_;
+  size_t buf_size_;
+};
+
+// Adapter class to make BrotliOut object from a file.
+class BrotliFileOut : public BrotliOut {
+ public:
+  explicit BrotliFileOut(FILE* f);
+
+  bool Write(const void* buf, size_t n);
+ private:
+  FILE* f_;
+};
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_STREAMS_H_
--- a/modules/brotli/enc/transform.h
+++ b/modules/brotli/enc/transform.h
@ -0,0 +1,248 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Transformations on dictionary words.
+
+#ifndef BROTLI_ENC_TRANSFORM_H_
+#define BROTLI_ENC_TRANSFORM_H_
+
+#include <string>
+
+#include "./dictionary.h"
+
+namespace brotli {
+
+enum WordTransformType {
+  kIdentity       = 0,
+  kOmitLast1      = 1,
+  kOmitLast2      = 2,
+  kOmitLast3      = 3,
+  kOmitLast4      = 4,
+  kOmitLast5      = 5,
+  kOmitLast6      = 6,
+  kOmitLast7      = 7,
+  kOmitLast8      = 8,
+  kOmitLast9      = 9,
+  kUppercaseFirst = 10,
+  kUppercaseAll   = 11,
+  kOmitFirst1     = 12,
+  kOmitFirst2     = 13,
+  kOmitFirst3     = 14,
+  kOmitFirst4     = 15,
+  kOmitFirst5     = 16,
+  kOmitFirst6     = 17,
+  kOmitFirst7     = 18,
+  kOmitFirst8     = 19,
+  kOmitFirst9     = 20
+};
+
+struct Transform {
+  const char* prefix;
+  WordTransformType word_transform;
+  const char* suffix;
+};
+
+static const Transform kTransforms[] = {
+     {         "", kIdentity,       ""           },
+     {         "", kIdentity,       " "          },
+     {        " ", kIdentity,       " "          },
+     {         "", kOmitFirst1,     ""           },
+     {         "", kUppercaseFirst, " "          },
+     {         "", kIdentity,       " the "      },
+     {        " ", kIdentity,       ""           },
+     {       "s ", kIdentity,       " "          },
+     {         "", kIdentity,       " of "       },
+     {         "", kUppercaseFirst, ""           },
+     {         "", kIdentity,       " and "      },
+     {         "", kOmitFirst2,     ""           },
+     {         "", kOmitLast1,      ""           },
+     {       ", ", kIdentity,       " "          },
+     {         "", kIdentity,       ", "         },
+     {        " ", kUppercaseFirst, " "          },
+     {         "", kIdentity,       " in "       },
+     {         "", kIdentity,       " to "       },
+     {       "e ", kIdentity,       " "          },
+     {         "", kIdentity,       "\""         },
+     {         "", kIdentity,       "."          },
+     {         "", kIdentity,       "\">"        },
+     {         "", kIdentity,       "\n"         },
+     {         "", kOmitLast3,      ""           },
+     {         "", kIdentity,       "]"          },
+     {         "", kIdentity,       " for "      },
+     {         "", kOmitFirst3,     ""           },
+     {         "", kOmitLast2,      ""           },
+     {         "", kIdentity,       " a "        },
+     {         "", kIdentity,       " that "     },
+     {        " ", kUppercaseFirst, ""           },
+     {         "", kIdentity,       ". "         },
+     {        ".", kIdentity,       ""           },
+     {        " ", kIdentity,       ", "         },
+     {         "", kOmitFirst4,     ""           },
+     {         "", kIdentity,       " with "     },
+     {         "", kIdentity,       "'"          },
+     {         "", kIdentity,       " from "     },
+     {         "", kIdentity,       " by "       },
+     {         "", kOmitFirst5,     ""           },
+     {         "", kOmitFirst6,     ""           },
+     {    " the ", kIdentity,       ""           },
+     {         "", kOmitLast4,      ""           },
+     {         "", kIdentity,       ". The "     },
+     {         "", kUppercaseAll,   ""           },
+     {         "", kIdentity,       " on "       },
+     {         "", kIdentity,       " as "       },
+     {         "", kIdentity,       " is "       },
+     {         "", kOmitLast7,      ""           },
+     {         "", kOmitLast1,      "ing "       },
+     {         "", kIdentity,       "\n\t"       },
+     {         "", kIdentity,       ":"          },
+     {        " ", kIdentity,       ". "         },
+     {         "", kIdentity,       "ed "        },
+     {         "", kOmitFirst9,     ""           },
+     {         "", kOmitFirst7,     ""           },
+     {         "", kOmitLast6,      ""           },
+     {         "", kIdentity,       "("          },
+     {         "", kUppercaseFirst, ", "         },
+     {         "", kOmitLast8,      ""           },
+     {         "", kIdentity,       " at "       },
+     {         "", kIdentity,       "ly "        },
+     {    " the ", kIdentity,       " of "       },
+     {         "", kOmitLast5,      ""           },
+     {         "", kOmitLast9,      ""           },
+     {        " ", kUppercaseFirst, ", "         },
+     {         "", kUppercaseFirst, "\""         },
+     {        ".", kIdentity,       "("          },
+     {         "", kUppercaseAll,   " "          },
+     {         "", kUppercaseFirst, "\">"        },
+     {         "", kIdentity,       "=\""        },
+     {        " ", kIdentity,       "."          },
+     {    ".com/", kIdentity,       ""           },
+     {    " the ", kIdentity,       " of the "   },
+     {         "", kUppercaseFirst, "'"          },
+     {         "", kIdentity,       ". This "    },
+     {         "", kIdentity,       ","          },
+     {        ".", kIdentity,       " "          },
+     {         "", kUppercaseFirst, "("          },
+     {         "", kUppercaseFirst, "."          },
+     {         "", kIdentity,       " not "      },
+     {        " ", kIdentity,       "=\""        },
+     {         "", kIdentity,       "er "        },
+     {        " ", kUppercaseAll,   " "          },
+     {         "", kIdentity,       "al "        },
+     {        " ", kUppercaseAll,   ""           },
+     {         "", kIdentity,       "='"         },
+     {         "", kUppercaseAll,   "\""         },
+     {         "", kUppercaseFirst, ". "         },
+     {        " ", kIdentity,       "("          },
+     {         "", kIdentity,       "ful "       },
+     {        " ", kUppercaseFirst, ". "         },
+     {         "", kIdentity,       "ive "       },
+     {         "", kIdentity,       "less "      },
+     {         "", kUppercaseAll,   "'"          },
+     {         "", kIdentity,       "est "       },
+     {        " ", kUppercaseFirst, "."          },
+     {         "", kUppercaseAll,   "\">"        },
+     {        " ", kIdentity,       "='"         },
+     {         "", kUppercaseFirst, ","          },
+     {         "", kIdentity,       "ize "       },
+     {         "", kUppercaseAll,   "."          },
+     { "\xc2\xa0", kIdentity,       ""           },
+     {        " ", kIdentity,       ","          },
+     {         "", kUppercaseFirst, "=\""        },
+     {         "", kUppercaseAll,   "=\""        },
+     {         "", kIdentity,       "ous "       },
+     {         "", kUppercaseAll,   ", "         },
+     {         "", kUppercaseFirst, "='"         },
+     {        " ", kUppercaseFirst, ","          },
+     {        " ", kUppercaseAll,   "=\""        },
+     {        " ", kUppercaseAll,   ", "         },
+     {         "", kUppercaseAll,   ","          },
+     {         "", kUppercaseAll,   "("          },
+     {         "", kUppercaseAll,   ". "         },
+     {        " ", kUppercaseAll,   "."          },
+     {         "", kUppercaseAll,   "='"         },
+     {        " ", kUppercaseAll,   ". "         },
+     {        " ", kUppercaseFirst, "=\""        },
+     {        " ", kUppercaseAll,   "='"         },
+     {        " ", kUppercaseFirst, "='"         },
+};
+
+static const size_t kNumTransforms =
+    sizeof(kTransforms) / sizeof(kTransforms[0]);
+
+static const size_t kOmitLastNTransforms[10] = {
+  0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
+};
+
+static size_t ToUpperCase(uint8_t *p, size_t len) {
+  if (len == 1 || p[0] < 0xc0) {
+    if (p[0] >= 'a' && p[0] <= 'z') {
+      p[0] ^= 32;
+    }
+    return 1;
+  }
+  if (p[0] < 0xe0) {
+    p[1] ^= 32;
+    return 2;
+  }
+  if (len == 2) {
+    return 2;
+  }
+  p[2] ^= 5;
+  return 3;
+}
+
+inline std::string TransformWord(
+    WordTransformType transform_type, const uint8_t* word, size_t len) {
+  if (transform_type <= kOmitLast9) {
+    if (len <= static_cast<size_t>(transform_type)) {
+      return std::string();
+    }
+    return std::string(word, word + len - transform_type);
+  }
+
+  if (transform_type >= kOmitFirst1) {
+    const size_t skip = transform_type - (kOmitFirst1 - 1);
+    if (len <= skip) {
+      return std::string();
+    }
+    return std::string(word + skip, word + len);
+  }
+
+  std::string ret = std::string(word, word + len);
+  uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
+  if (transform_type == kUppercaseFirst) {
+    ToUpperCase(uppercase, len);
+  } else if (transform_type == kUppercaseAll) {
+    size_t position = 0;
+    while (position < len) {
+      size_t step = ToUpperCase(uppercase, len - position);
+      uppercase += step;
+      position += step;
+    }
+  }
+  return ret;
+}
+
+inline std::string ApplyTransform(
+    const Transform& t, const uint8_t* word, size_t len) {
+  return std::string(t.prefix) +
+      TransformWord(t.word_transform, word, len) + std::string(t.suffix);
+}
+
+inline std::string GetTransformedDictionaryWord(size_t len_code,
+                                                size_t word_id) {
+  size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
+  size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
+  size_t t = word_id / num_words;
+  size_t word_idx = word_id % num_words;
+  offset += len_code * word_idx;
+  const uint8_t* word = &kBrotliDictionary[offset];
+  return ApplyTransform(kTransforms[t], word, len_code);
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_TRANSFORM_H_
--- a/modules/brotli/enc/types.h
+++ b/modules/brotli/enc/types.h
@ -0,0 +1,29 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Common types */
+
+#ifndef BROTLI_ENC_TYPES_H_
+#define BROTLI_ENC_TYPES_H_
+
+#include <stddef.h>  /* for size_t */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+typedef __int64 int64_t;
+#else
+#include <stdint.h>
+#endif  /* defined(_MSC_VER) && (_MSC_VER < 1600) */
+
+#define MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
+
+#endif  /* BROTLI_ENC_TYPES_H_ */
--- a/modules/brotli/enc/utf8_util.cc
+++ b/modules/brotli/enc/utf8_util.cc
@ -0,0 +1,83 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Heuristics for deciding about the UTF8-ness of strings.
+
+#include "./utf8_util.h"
+
+#include "./types.h"
+
+namespace brotli {
+
+namespace {
+
+size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
+  // ASCII
+  if ((input[0] & 0x80) == 0) {
+    *symbol = input[0];
+    if (*symbol > 0) {
+      return 1;
+    }
+  }
+  // 2-byte UTF8
+  if (size > 1u &&
+      (input[0] & 0xe0) == 0xc0 &&
+      (input[1] & 0xc0) == 0x80) {
+    *symbol = (((input[0] & 0x1f) << 6) |
+               (input[1] & 0x3f));
+    if (*symbol > 0x7f) {
+      return 2;
+    }
+  }
+  // 3-byte UFT8
+  if (size > 2u &&
+      (input[0] & 0xf0) == 0xe0 &&
+      (input[1] & 0xc0) == 0x80 &&
+      (input[2] & 0xc0) == 0x80) {
+    *symbol = (((input[0] & 0x0f) << 12) |
+               ((input[1] & 0x3f) << 6) |
+               (input[2] & 0x3f));
+    if (*symbol > 0x7ff) {
+      return 3;
+    }
+  }
+  // 4-byte UFT8
+  if (size > 3u &&
+      (input[0] & 0xf8) == 0xf0 &&
+      (input[1] & 0xc0) == 0x80 &&
+      (input[2] & 0xc0) == 0x80 &&
+      (input[3] & 0xc0) == 0x80) {
+    *symbol = (((input[0] & 0x07) << 18) |
+               ((input[1] & 0x3f) << 12) |
+               ((input[2] & 0x3f) << 6) |
+               (input[3] & 0x3f));
+    if (*symbol > 0xffff && *symbol <= 0x10ffff) {
+      return 4;
+    }
+  }
+  // Not UTF8, emit a special symbol above the UTF8-code space
+  *symbol = 0x110000 | input[0];
+  return 1;
+}
+
+}  // namespace
+
+// Returns true if at least min_fraction of the data is UTF8-encoded.
+bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
+                  const size_t length, const double min_fraction) {
+  size_t size_utf8 = 0;
+  size_t i = 0;
+  while (i < length) {
+    int symbol;
+    size_t bytes_read = ParseAsUTF8(
+        &symbol, &data[(pos + i) & mask], length - i);
+    i += bytes_read;
+    if (symbol < 0x110000) size_utf8 += bytes_read;
+  }
+  return size_utf8 > min_fraction * static_cast<double>(length);
+}
+
+}  // namespace brotli
--- a/modules/brotli/enc/utf8_util.h
+++ b/modules/brotli/enc/utf8_util.h
@ -0,0 +1,25 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Heuristics for deciding about the UTF8-ness of strings.
+
+#ifndef BROTLI_ENC_UTF8_UTIL_H_
+#define BROTLI_ENC_UTF8_UTIL_H_
+
+#include "./types.h"
+
+namespace brotli {
+
+static const double kMinUTF8Ratio = 0.75;
+
+// Returns true if at least min_fraction of the bytes between pos and
+// pos + length in the (data, mask) ringbuffer is UTF8-encoded.
+bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
+                  const size_t length, const double min_fraction);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_UTF8_UTIL_H_
--- a/modules/brotli/enc/write_bits.h
+++ b/modules/brotli/enc/write_bits.h
@ -0,0 +1,84 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+// Write bits into a byte array.
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "./port.h"
+#include "./types.h"
+
+namespace brotli {
+
+//#define BIT_WRITER_DEBUG
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 56 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0     BYTE+1       BYTE+2
+//
+// 0000 0RRR    0000 0000    0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just sifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1, BYTE+2, etc.
+inline void WriteBits(size_t n_bits,
+                      uint64_t bits,
+                      size_t * __restrict pos,
+                      uint8_t * __restrict array) {
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
+#endif
+  assert((bits >> n_bits) == 0);
+  assert(n_bits <= 56);
+#ifdef IS_LITTLE_ENDIAN
+  // This branch of the code can write up to 56 bits at a time,
+  // 7 bits are lost by being perhaps already in *p and at least
+  // 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+  // bits are in *p and we write 57 bits, then the next write will
+  // access a byte that was never initialized).
+  uint8_t *p = &array[*pos >> 3];
+  uint64_t v = *p;
+  v |= bits << (*pos & 7);
+  BROTLI_UNALIGNED_STORE64(p, v);  // Set some bits.
+  *pos += n_bits;
+#else
+  // implicit & 0xff is assumed for uint8_t arithmetics
+  uint8_t *array_pos = &array[*pos >> 3];
+  const size_t bits_reserved_in_first_byte = (*pos & 7);
+  bits <<= bits_reserved_in_first_byte;
+  *array_pos++ |= static_cast<uint8_t>(bits);
+  for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
+       bits_left_to_write >= 9;
+       bits_left_to_write -= 8) {
+    bits >>= 8;
+    *array_pos++ = static_cast<uint8_t>(bits);
+  }
+  *array_pos = 0;
+  *pos += n_bits;
+#endif
+}
+
+inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBitsPrepareStorage            %10d\n", pos);
+#endif
+  assert((pos & 7) == 0);
+  array[pos >> 3] = 0;
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_WRITE_BITS_H_
--- a/modules/brotli/tools/Makefile
+++ b/modules/brotli/tools/Makefile
@ -0,0 +1,25 @@
+#brotli/tools
+
+include ../shared.mk
+
+BROTLI = ..
+ENCOBJ = $(BROTLI)/enc/*.o
+DECOBJ = $(BROTLI)/dec/*.o
+
+EXECUTABLES=bro
+
+EXE_OBJS=$(patsubst %, %.o, $(EXECUTABLES))
+
+all : $(EXECUTABLES)
+
+$(EXECUTABLES) : $(EXE_OBJS) deps
+	$(CXX) $(LDFLAGS) $(ENCOBJ) $(DECOBJ) $@.o -o $@
+
+deps :
+	$(MAKE) -C $(BROTLI)/dec
+	$(MAKE) -C $(BROTLI)/enc nodict
+
+clean :
+	rm -f $(OBJS) $(EXE_OBJS) $(EXECUTABLES)
+	$(MAKE) -C $(BROTLI)/dec clean
+	$(MAKE) -C $(BROTLI)/enc clean
--- a/modules/brotli/tools/bro.cc
+++ b/modules/brotli/tools/bro.cc
@ -0,0 +1,335 @@
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Example main() function for Brotli library. */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <string>
+
+#include "../dec/decode.h"
+#include "../enc/compressor.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#else
+#include <io.h>
+
+#define STDIN_FILENO _fileno(stdin)
+#define STDOUT_FILENO _fileno(stdout)
+#define S_IRUSR S_IREAD
+#define S_IWUSR S_IWRITE
+#define fdopen _fdopen
+#define unlink _unlink
+
+#define fopen ms_fopen
+#define open ms_open
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#define fseek _fseeki64
+#define ftell _ftelli64
+#endif
+
+static inline FILE* ms_fopen(const char *filename, const char *mode) {
+  FILE* result = 0;
+  fopen_s(&result, filename, mode);
+  return result;
+}
+
+static inline int ms_open(const char *filename, int oflag, int pmode) {
+  int result = -1;
+  _sopen_s(&result, filename, oflag | O_BINARY, _SH_DENYNO, pmode);
+  return result;
+}
+#endif  /* WIN32 */
+
+
+static bool ParseQuality(const char* s, int* quality) {
+  if (s[0] >= '0' && s[0] <= '9') {
+    *quality = s[0] - '0';
+    if (s[1] >= '0' && s[1] <= '9') {
+      *quality = *quality * 10 + s[1] - '0';
+      return s[2] == 0;
+    }
+    return s[1] == 0;
+  }
+  return false;
+}
+
+static void ParseArgv(int argc, char **argv,
+                      char **input_path,
+                      char **output_path,
+                      int *force,
+                      int *quality,
+                      int *decompress,
+                      int *repeat,
+                      int *verbose,
+                      int *lgwin) {
+  *force = 0;
+  *input_path = 0;
+  *output_path = 0;
+  *repeat = 1;
+  *verbose = 0;
+  *lgwin = 22;
+  {
+    size_t argv0_len = strlen(argv[0]);
+    *decompress =
+        argv0_len >= 5 && strcmp(&argv[0][argv0_len - 5], "unbro") == 0;
+  }
+  for (int k = 1; k < argc; ++k) {
+    if (!strcmp("--force", argv[k]) ||
+        !strcmp("-f", argv[k])) {
+      if (*force != 0) {
+        goto error;
+      }
+      *force = 1;
+      continue;
+    } else if (!strcmp("--decompress", argv[k]) ||
+               !strcmp("--uncompress", argv[k]) ||
+               !strcmp("-d", argv[k])) {
+      *decompress = 1;
+      continue;
+    } else if (!strcmp("--verbose", argv[k]) ||
+               !strcmp("-v", argv[k])) {
+      if (*verbose != 0) {
+        goto error;
+      }
+      *verbose = 1;
+      continue;
+    }
+    if (k < argc - 1) {
+      if (!strcmp("--input", argv[k]) ||
+          !strcmp("--in", argv[k]) ||
+          !strcmp("-i", argv[k])) {
+        if (*input_path != 0) {
+          goto error;
+        }
+        *input_path = argv[k + 1];
+        ++k;
+        continue;
+      } else if (!strcmp("--output", argv[k]) ||
+                 !strcmp("--out", argv[k]) ||
+                 !strcmp("-o", argv[k])) {
+        if (*output_path != 0) {
+          goto error;
+        }
+        *output_path = argv[k + 1];
+        ++k;
+        continue;
+      } else if (!strcmp("--quality", argv[k]) ||
+                 !strcmp("-q", argv[k])) {
+        if (!ParseQuality(argv[k + 1], quality)) {
+          goto error;
+        }
+        ++k;
+        continue;
+      } else if (!strcmp("--repeat", argv[k]) ||
+                 !strcmp("-r", argv[k])) {
+        if (!ParseQuality(argv[k + 1], repeat)) {
+          goto error;
+        }
+        ++k;
+        continue;
+      }  else if (!strcmp("--window", argv[k]) ||
+                  !strcmp("-w", argv[k])) {
+        if (!ParseQuality(argv[k + 1], lgwin)) {
+          goto error;
+        }
+        if (*lgwin < 10 || *lgwin >= 25) {
+          goto error;
+        }
+        ++k;
+        continue;
+      }
+    }
+    goto error;
+  }
+  return;
+error:
+  fprintf(stderr,
+          "Usage: %s [--force] [--quality n] [--decompress]"
+          " [--input filename] [--output filename] [--repeat iters]"
+          " [--verbose] [--window n]\n",
+          argv[0]);
+  exit(1);
+}
+
+static FILE* OpenInputFile(const char* input_path) {
+  if (input_path == 0) {
+    return fdopen(STDIN_FILENO, "rb");
+  }
+  FILE* f = fopen(input_path, "rb");
+  if (f == 0) {
+    perror("fopen");
+    exit(1);
+  }
+  return f;
+}
+
+static FILE *OpenOutputFile(const char *output_path, const int force) {
+  if (output_path == 0) {
+    return fdopen(STDOUT_FILENO, "wb");
+  }
+  int excl = force ? 0 : O_EXCL;
+  int fd = open(output_path, O_CREAT | excl | O_WRONLY | O_TRUNC,
+                S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    if (!force) {
+      struct stat statbuf;
+      if (stat(output_path, &statbuf) == 0) {
+        fprintf(stderr, "output file exists\n");
+        exit(1);
+      }
+    }
+    perror("open");
+    exit(1);
+  }
+  return fdopen(fd, "wb");
+}
+
+static int64_t FileSize(char *path) {
+  FILE *f = fopen(path, "rb");
+  if (f == NULL) {
+    return -1;
+  }
+  if (fseek(f, 0L, SEEK_END) != 0) {
+    fclose(f);
+    return -1;
+  }
+  int64_t retval = ftell(f);
+  if (fclose(f) != 0) {
+    return -1;
+  }
+  return retval;
+}
+
+static const size_t kFileBufferSize = 65536;
+
+static void Decompresss(FILE* fin, FILE* fout) {
+  BrotliState* s = BrotliCreateState(NULL, NULL, NULL);
+  if (!s) {
+    fprintf(stderr, "out of memory\n");
+    exit(1);
+  }
+  uint8_t* input = new uint8_t[kFileBufferSize];
+  uint8_t* output = new uint8_t[kFileBufferSize];
+  size_t total_out;
+  size_t available_in;
+  const uint8_t* next_in;
+  size_t available_out = kFileBufferSize;
+  uint8_t* next_out = output;
+  BrotliResult result = BROTLI_RESULT_NEEDS_MORE_INPUT;
+  while (1) {
+    if (result == BROTLI_RESULT_NEEDS_MORE_INPUT) {
+      if (feof(fin)) {
+        break;
+      }
+      available_in = fread(input, 1, kFileBufferSize, fin);
+      next_in = input;
+      if (ferror(fin)) {
+        break;
+      }
+    } else if (result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) {
+      fwrite(output, 1, kFileBufferSize, fout);
+      if (ferror(fout)) {
+        break;
+      }
+      available_out = kFileBufferSize;
+      next_out = output;
+    } else {
+      break; /* Error or success. */
+    }
+    result = BrotliDecompressStream(&available_in, &next_in,
+        &available_out, &next_out, &total_out, s);
+  }
+  if (next_out != output) {
+    fwrite(output, 1, static_cast<size_t>(next_out - output), fout);
+  }
+  delete[] input;
+  delete[] output;
+  BrotliDestroyState(s);
+  if ((result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) || ferror(fout)) {
+    fprintf(stderr, "failed to write output\n");
+    exit(1);
+  } else if (result != BROTLI_RESULT_SUCCESS) { /* Error or needs more input. */
+    fprintf(stderr, "corrupt input\n");
+    exit(1);
+  }
+}
+
+int main(int argc, char** argv) {
+  char *input_path = 0;
+  char *output_path = 0;
+  int force = 0;
+  int quality = 11;
+  int decompress = 0;
+  int repeat = 1;
+  int verbose = 0;
+  int lgwin = 0;
+  ParseArgv(argc, argv, &input_path, &output_path, &force,
+            &quality, &decompress, &repeat, &verbose, &lgwin);
+  const clock_t clock_start = clock();
+  for (int i = 0; i < repeat; ++i) {
+    FILE* fin = OpenInputFile(input_path);
+    FILE* fout = OpenOutputFile(output_path, force);
+    if (decompress) {
+      Decompresss(fin, fout);
+    } else {
+      brotli::BrotliParams params;
+      params.lgwin = lgwin;
+      params.quality = quality;
+      try {
+        brotli::BrotliFileIn in(fin, 1 << 16);
+        brotli::BrotliFileOut out(fout);
+        if (!BrotliCompress(params, &in, &out)) {
+          fprintf(stderr, "compression failed\n");
+          unlink(output_path);
+          exit(1);
+        }
+      } catch (std::bad_alloc&) {
+        fprintf(stderr, "not enough memory\n");
+        unlink(output_path);
+        exit(1);
+      }
+    }
+    if (fclose(fin) != 0) {
+      perror("fclose");
+      exit(1);
+    }
+    if (fclose(fout) != 0) {
+      perror("fclose");
+      exit(1);
+    }
+  }
+  if (verbose) {
+    const clock_t clock_end = clock();
+    double duration =
+        static_cast<double>(clock_end - clock_start) / CLOCKS_PER_SEC;
+    if (duration < 1e-9) {
+      duration = 1e-9;
+    }
+    int64_t uncompressed_size = FileSize(decompress ? output_path : input_path);
+    if (uncompressed_size == -1) {
+      fprintf(stderr, "failed to determine uncompressed file size\n");
+      exit(1);
+    }
+    double uncompressed_bytes_in_MB =
+        static_cast<double>(repeat * uncompressed_size) / (1024.0 * 1024.0);
+    if (decompress) {
+      printf("Brotli decompression speed: ");
+    } else {
+      printf("Brotli compression speed: ");
+    }
+    printf("%g MB/s\n", uncompressed_bytes_in_MB / duration);
+  }
+  return 0;
+}
--- a/modules/brotli/tools/rfc-format.py
+++ b/modules/brotli/tools/rfc-format.py
@ -0,0 +1,92 @@
+#!/usr/bin/python
+#
+# Takes an .nroff source file and prints a text file in RFC format.
+#
+# Usage: rfc-format.py <source file>
+
+import re
+import sys
+from subprocess import Popen, PIPE
+
+
+def Readfile(fn):
+  f = open(fn, "r")
+  return f.read()
+
+
+def FixNroffOutput(buf):
+  p = re.compile(r'(.*)FORMFEED(\[Page\s+\d+\])$')
+  strip_empty = False
+  out = ""
+  for line in buf.split("\n"):
+    line = line.replace("\xe2\x80\x99", "'")
+    line = line.replace("\xe2\x80\x90", "-")
+    for i in range(len(line)):
+      if ord(line[i]) > 128:
+        print >>sys.stderr, "Invalid character %d\n" % ord(line[i])
+    m = p.search(line)
+    if strip_empty and len(line) == 0:
+      continue
+    if m:
+     out += p.sub(r'\1        \2\n\f', line)
+     out += "\n"
+     strip_empty = True
+    else:
+      out += "%s\n" % line
+      strip_empty = False
+  return out.rstrip("\n")
+
+
+def Nroff(buf):
+  p = Popen(["nroff", "-ms"], stdin=PIPE, stdout=PIPE)
+  out, err = p.communicate(input=buf)
+  return FixNroffOutput(out)
+
+
+def FormatTocLine(section, title, page):
+  line = ""
+  level = 1
+  if section:
+    level = section.count(".")
+  for i in range(level):
+    line += "   "
+  if section:
+    line += "%s  " % section
+  line += "%s " % title
+  pagenum = "%d" % page
+  nspace = 72 - len(line) - len(pagenum)
+  if nspace % 2:
+    line += " "
+  for i in range(nspace / 2):
+    line += ". "
+  line += "%d\n" % page
+  return line
+
+
+def CreateToc(buf):
+  p1 = re.compile(r'^((\d+\.)+)\s+(.*)$')
+  p2 = re.compile(r'^(Appendix [A-Z].)\s+(.*)$')
+  p3 = re.compile(r'\[Page (\d+)\]$')
+  found = 0
+  page = 1
+  out = ""
+  for line in buf.split("\n"):
+    m1 = p1.search(line)
+    m2 = p2.search(line)
+    m3 = p3.search(line)
+    if m1:
+      out += FormatTocLine(m1.group(1), m1.group(3), page)
+    elif m2:
+      out += FormatTocLine(m2.group(1), m2.group(2), page)
+    elif line.startswith("Authors"):
+      out += FormatTocLine(None, line, page)
+    elif m3:
+      page = int(m3.group(1)) + 1
+  return out
+
+
+src = Readfile(sys.argv[1])
+out = Nroff(src)
+toc = CreateToc(out)
+src = src.replace("INSERT_TOC_HERE", toc)
+print Nroff(src)
--- a/modules/brotli/tools/version.h
+++ b/modules/brotli/tools/version.h
@ -0,0 +1,14 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Defines a common version string used by all of the brotli tools. */
+
+#ifndef BROTLI_TOOLS_VERSION_H_
+#define BROTLI_TOOLS_VERSION_H_
+
+#define BROTLI_VERSION "0.4.0"
+
+#endif  /* BROTLI_TOOLS_VERSION_H_ */
--- a/modules/brotli/update.sh
+++ b/modules/brotli/update.sh
@ -6,14 +6,19 @@
 MY_TEMP_DIR=`mktemp -d -t brotli_update.XXXXXX` || exit 1

 git clone https://github.com/google/brotli ${MY_TEMP_DIR}/brotli
+git -C ${MY_TEMP_DIR}/brotli checkout v0.4.0

-COMMIT=`(cd ${MY_TEMP_DIR}/brotli && git log | head -n 1)`
-perl -p -i -e "s/\[commit [0-9a-f]{40}\]/[${COMMIT}]/" README.mozilla;
+COMMIT=$(git -C ${MY_TEMP_DIR}/brotli rev-parse HEAD)
+perl -p -i -e "s/\[commit [0-9a-f]{40}\]/[commit ${COMMIT}]/" README.mozilla;

-rm -rf dec
-mv ${MY_TEMP_DIR}/brotli/dec dec
+DIRS="dec enc tools"
+
+for d in $DIRS; do
+	rm -rf $d
+	mv ${MY_TEMP_DIR}/brotli/$d $d
+done
 rm -rf ${MY_TEMP_DIR}
-hg add dec
+hg addremove $DIRS

 echo "###"
 echo "### Updated brotli/dec to $COMMIT."