snappy/encode.go

259 строки
7.1 KiB
Go
Исходник Обычный вид История

// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"io"
)
// We limit how far copy back-references can go, the same as the C++ code.
const maxOffset = 1 << 15
// emitLiteral writes a literal chunk and returns the number of bytes written.
func emitLiteral(dst, lit []byte) int {
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[0] = 60<<2 | tagLiteral
dst[1] = uint8(n)
i = 2
case n < 1<<16:
dst[0] = 61<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
i = 3
case n < 1<<24:
dst[0] = 62<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
dst[3] = uint8(n >> 16)
i = 4
case int64(n) < 1<<32:
dst[0] = 63<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
dst[3] = uint8(n >> 16)
dst[4] = uint8(n >> 24)
i = 5
default:
panic("snappy: source buffer is too long")
}
if copy(dst[i:], lit) != len(lit) {
panic("snappy: destination buffer is too short")
}
return i + len(lit)
}
// emitCopy writes a copy chunk and returns the number of bytes written.
func emitCopy(dst []byte, offset, length int) int {
i := 0
for length > 0 {
x := length - 4
if 0 <= x && x < 1<<3 && offset < 1<<11 {
dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
dst[i+1] = uint8(offset)
i += 2
break
}
x = length
if x > 1<<6 {
x = 1 << 6
}
dst[i+0] = uint8(x-1)<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= x
}
return i
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
// It is valid to pass a nil dst.
func Encode(dst, src []byte) ([]byte, error) {
if n := MaxEncodedLen(len(src)); len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
// Return early if src is short.
if len(src) <= 4 {
if len(src) != 0 {
d += emitLiteral(dst[d:], src)
}
return dst[:d], nil
}
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
const maxTableSize = 1 << 14
shift, tableSize := uint(32-8), 1<<8
for tableSize < maxTableSize && tableSize < len(src) {
shift--
tableSize *= 2
}
var table [maxTableSize]int
// Iterate over the source bytes.
var (
s int // The iterator position.
t int // The last position with the same hash as s.
lit int // The start position of any pending literal bytes.
)
for s+3 < len(src) {
// Update the hash table.
snappy-go: Improve encode performance a bit. Incorporates additional changes suggested by Dmitry Chestnykh, which proved to gain even some more speed up. $ benchcmp old.txt new.txt benchmark old ns/op new ns/op delta BenchmarkWordsDecode1e3 6795 6823 +0.41% BenchmarkWordsDecode1e4 67244 67337 +0.14% BenchmarkWordsDecode1e5 692152 693272 +0.16% BenchmarkWordsDecode1e6 6014484 6022518 +0.13% BenchmarkWordsEncode1e3 24069 22256 -7.53% BenchmarkWordsEncode1e4 155522 130625 -16.01% BenchmarkWordsEncode1e5 1451341 1342459 -7.50% BenchmarkWordsEncode1e6 11588325 10801803 -6.79% Benchmark_UFlat0 324272 324723 +0.14% Benchmark_UFlat1 3054906 3061657 +0.22% Benchmark_UFlat2 34889 34955 +0.19% Benchmark_UFlat3 106121 106247 +0.12% Benchmark_UFlat4 1301508 1303304 +0.14% Benchmark_UFlat5 117178 117506 +0.28% Benchmark_UFlat6 52977 53008 +0.06% Benchmark_UFlat7 17865 17895 +0.17% Benchmark_UFlat8 4049196 4066785 +0.43% Benchmark_UFlat9 935762 938304 +0.27% Benchmark_UFlat10 808857 811668 +0.35% Benchmark_UFlat11 2518497 2525181 +0.27% Benchmark_UFlat12 3269812 3279646 +0.30% Benchmark_UFlat13 1432980 1434405 +0.10% Benchmark_UFlat14 195012 195467 +0.23% Benchmark_UFlat15 22993 23028 +0.15% Benchmark_UFlat16 327945 328377 +0.13% Benchmark_UFlat17 827246 828168 +0.11% Benchmark_ZFlat0 586969 548872 -6.49% Benchmark_ZFlat1 8081042 7590963 -6.06% Benchmark_ZFlat2 2229381 1895227 -14.99% Benchmark_ZFlat3 1409796 1214367 -13.86% Benchmark_ZFlat4 2254589 2153293 -4.49% Benchmark_ZFlat5 283640 254657 -10.22% Benchmark_ZFlat6 128752 106041 -17.64% Benchmark_ZFlat7 50282 44514 -11.47% Benchmark_ZFlat8 7295276 6813961 -6.60% Benchmark_ZFlat9 1947643 1821234 -6.49% Benchmark_ZFlat10 1723946 1607431 -6.76% Benchmark_ZFlat11 5202240 4889793 -6.01% Benchmark_ZFlat12 6877479 6406872 -6.84% Benchmark_ZFlat13 2504360 2380325 -4.95% Benchmark_ZFlat14 449529 411485 -8.46% Benchmark_ZFlat15 67703 57238 -15.46% Benchmark_ZFlat16 599605 560547 -6.51% Benchmark_ZFlat17 1512113 1364405 -9.77% benchmark old MB/s new MB/s speedup BenchmarkWordsDecode1e3 147.16 146.55 1.00x BenchmarkWordsDecode1e4 148.71 148.51 1.00x BenchmarkWordsDecode1e5 144.48 144.24 1.00x BenchmarkWordsDecode1e6 166.27 166.04 1.00x BenchmarkWordsEncode1e3 41.55 44.93 1.08x BenchmarkWordsEncode1e4 64.30 76.55 1.19x BenchmarkWordsEncode1e5 68.90 74.49 1.08x BenchmarkWordsEncode1e6 86.29 92.58 1.07x Benchmark_UFlat0 315.78 315.34 1.00x Benchmark_UFlat1 229.82 229.32 1.00x Benchmark_UFlat2 3638.81 3632.04 1.00x Benchmark_UFlat3 888.89 887.83 1.00x Benchmark_UFlat4 314.71 314.28 1.00x Benchmark_UFlat5 209.96 209.38 1.00x Benchmark_UFlat6 210.47 210.34 1.00x Benchmark_UFlat7 208.28 207.93 1.00x Benchmark_UFlat8 254.31 253.21 1.00x Benchmark_UFlat9 162.53 162.09 1.00x Benchmark_UFlat10 154.76 154.22 1.00x Benchmark_UFlat11 169.45 169.00 1.00x Benchmark_UFlat12 147.37 146.92 1.00x Benchmark_UFlat13 358.15 357.79 1.00x Benchmark_UFlat14 196.09 195.63 1.00x Benchmark_UFlat15 183.84 183.56 1.00x Benchmark_UFlat16 361.61 361.13 1.00x Benchmark_UFlat17 222.81 222.56 1.00x Benchmark_ZFlat0 174.46 186.56 1.07x Benchmark_ZFlat1 86.88 92.49 1.06x Benchmark_ZFlat2 56.95 66.99 1.18x Benchmark_ZFlat3 66.91 77.68 1.16x Benchmark_ZFlat4 181.67 190.22 1.05x Benchmark_ZFlat5 86.74 96.61 1.11x Benchmark_ZFlat6 86.60 105.15 1.21x Benchmark_ZFlat7 74.00 83.59 1.13x Benchmark_ZFlat8 141.15 151.12 1.07x Benchmark_ZFlat9 78.09 83.51 1.07x Benchmark_ZFlat10 72.61 77.88 1.07x Benchmark_ZFlat11 82.03 87.27 1.06x Benchmark_ZFlat12 70.06 75.21 1.07x Benchmark_ZFlat13 204.93 215.61 1.05x Benchmark_ZFlat14 85.07 92.93 1.09x Benchmark_ZFlat15 62.43 73.85 1.18x Benchmark_ZFlat16 197.78 211.56 1.07x Benchmark_ZFlat17 121.90 135.09 1.11x $ R=nigeltao, dchest CC=golang-dev https://codereview.appspot.com/7346051 Committer: Nigel Tao <nigeltao@golang.org>
2013-02-20 04:31:25 +04:00
b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
p := &table[(h*0x1e35a7bd)>>shift]
// We need to to store values in [-1, inf) in table. To save
// some initialization time, (re)use the table's zero value
// and shift the values against this zero: add 1 on writes,
// subtract 1 on reads.
t, *p = *p-1, s+1
// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
snappy-go: Improve encode performance a bit. Incorporates additional changes suggested by Dmitry Chestnykh, which proved to gain even some more speed up. $ benchcmp old.txt new.txt benchmark old ns/op new ns/op delta BenchmarkWordsDecode1e3 6795 6823 +0.41% BenchmarkWordsDecode1e4 67244 67337 +0.14% BenchmarkWordsDecode1e5 692152 693272 +0.16% BenchmarkWordsDecode1e6 6014484 6022518 +0.13% BenchmarkWordsEncode1e3 24069 22256 -7.53% BenchmarkWordsEncode1e4 155522 130625 -16.01% BenchmarkWordsEncode1e5 1451341 1342459 -7.50% BenchmarkWordsEncode1e6 11588325 10801803 -6.79% Benchmark_UFlat0 324272 324723 +0.14% Benchmark_UFlat1 3054906 3061657 +0.22% Benchmark_UFlat2 34889 34955 +0.19% Benchmark_UFlat3 106121 106247 +0.12% Benchmark_UFlat4 1301508 1303304 +0.14% Benchmark_UFlat5 117178 117506 +0.28% Benchmark_UFlat6 52977 53008 +0.06% Benchmark_UFlat7 17865 17895 +0.17% Benchmark_UFlat8 4049196 4066785 +0.43% Benchmark_UFlat9 935762 938304 +0.27% Benchmark_UFlat10 808857 811668 +0.35% Benchmark_UFlat11 2518497 2525181 +0.27% Benchmark_UFlat12 3269812 3279646 +0.30% Benchmark_UFlat13 1432980 1434405 +0.10% Benchmark_UFlat14 195012 195467 +0.23% Benchmark_UFlat15 22993 23028 +0.15% Benchmark_UFlat16 327945 328377 +0.13% Benchmark_UFlat17 827246 828168 +0.11% Benchmark_ZFlat0 586969 548872 -6.49% Benchmark_ZFlat1 8081042 7590963 -6.06% Benchmark_ZFlat2 2229381 1895227 -14.99% Benchmark_ZFlat3 1409796 1214367 -13.86% Benchmark_ZFlat4 2254589 2153293 -4.49% Benchmark_ZFlat5 283640 254657 -10.22% Benchmark_ZFlat6 128752 106041 -17.64% Benchmark_ZFlat7 50282 44514 -11.47% Benchmark_ZFlat8 7295276 6813961 -6.60% Benchmark_ZFlat9 1947643 1821234 -6.49% Benchmark_ZFlat10 1723946 1607431 -6.76% Benchmark_ZFlat11 5202240 4889793 -6.01% Benchmark_ZFlat12 6877479 6406872 -6.84% Benchmark_ZFlat13 2504360 2380325 -4.95% Benchmark_ZFlat14 449529 411485 -8.46% Benchmark_ZFlat15 67703 57238 -15.46% Benchmark_ZFlat16 599605 560547 -6.51% Benchmark_ZFlat17 1512113 1364405 -9.77% benchmark old MB/s new MB/s speedup BenchmarkWordsDecode1e3 147.16 146.55 1.00x BenchmarkWordsDecode1e4 148.71 148.51 1.00x BenchmarkWordsDecode1e5 144.48 144.24 1.00x BenchmarkWordsDecode1e6 166.27 166.04 1.00x BenchmarkWordsEncode1e3 41.55 44.93 1.08x BenchmarkWordsEncode1e4 64.30 76.55 1.19x BenchmarkWordsEncode1e5 68.90 74.49 1.08x BenchmarkWordsEncode1e6 86.29 92.58 1.07x Benchmark_UFlat0 315.78 315.34 1.00x Benchmark_UFlat1 229.82 229.32 1.00x Benchmark_UFlat2 3638.81 3632.04 1.00x Benchmark_UFlat3 888.89 887.83 1.00x Benchmark_UFlat4 314.71 314.28 1.00x Benchmark_UFlat5 209.96 209.38 1.00x Benchmark_UFlat6 210.47 210.34 1.00x Benchmark_UFlat7 208.28 207.93 1.00x Benchmark_UFlat8 254.31 253.21 1.00x Benchmark_UFlat9 162.53 162.09 1.00x Benchmark_UFlat10 154.76 154.22 1.00x Benchmark_UFlat11 169.45 169.00 1.00x Benchmark_UFlat12 147.37 146.92 1.00x Benchmark_UFlat13 358.15 357.79 1.00x Benchmark_UFlat14 196.09 195.63 1.00x Benchmark_UFlat15 183.84 183.56 1.00x Benchmark_UFlat16 361.61 361.13 1.00x Benchmark_UFlat17 222.81 222.56 1.00x Benchmark_ZFlat0 174.46 186.56 1.07x Benchmark_ZFlat1 86.88 92.49 1.06x Benchmark_ZFlat2 56.95 66.99 1.18x Benchmark_ZFlat3 66.91 77.68 1.16x Benchmark_ZFlat4 181.67 190.22 1.05x Benchmark_ZFlat5 86.74 96.61 1.11x Benchmark_ZFlat6 86.60 105.15 1.21x Benchmark_ZFlat7 74.00 83.59 1.13x Benchmark_ZFlat8 141.15 151.12 1.07x Benchmark_ZFlat9 78.09 83.51 1.07x Benchmark_ZFlat10 72.61 77.88 1.07x Benchmark_ZFlat11 82.03 87.27 1.06x Benchmark_ZFlat12 70.06 75.21 1.07x Benchmark_ZFlat13 204.93 215.61 1.05x Benchmark_ZFlat14 85.07 92.93 1.09x Benchmark_ZFlat15 62.43 73.85 1.18x Benchmark_ZFlat16 197.78 211.56 1.07x Benchmark_ZFlat17 121.90 135.09 1.11x $ R=nigeltao, dchest CC=golang-dev https://codereview.appspot.com/7346051 Committer: Nigel Tao <nigeltao@golang.org>
2013-02-20 04:31:25 +04:00
if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
s++
continue
}
// Otherwise, we have a match. First, emit any pending literal bytes.
if lit != s {
d += emitLiteral(dst[d:], src[lit:s])
}
// Extend the match to be as long as possible.
s0 := s
s, t = s+4, t+4
for s < len(src) && src[s] == src[t] {
s++
t++
}
// Emit the copied bytes.
d += emitCopy(dst[d:], s-t, s-s0)
lit = s
}
// Emit any final pending literal bytes and return.
if lit != len(src) {
d += emitLiteral(dst[d:], src[lit:])
}
return dst[:d], nil
}
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
func MaxEncodedLen(srcLen int) int {
// Compressed data can be defined as:
// compressed := item* literal*
// item := literal* copy
//
// The trailing literal sequence has a space blowup of at most 62/60
// since a literal of length 60 needs one tag byte + one extra byte
// for length information.
//
// Item blowup is trickier to measure. Suppose the "copy" op copies
// 4 bytes of data. Because of a special check in the encoding code,
// we produce a 4-byte copy only if the offset is < 65536. Therefore
// the copy op takes 3 bytes to encode, and this type of item leads
// to at most the 62/60 blowup for representing literals.
//
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
// enough, it will take 5 bytes to encode the copy op. Therefore the
// worst case here is a one-byte literal followed by a five-byte copy.
// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
//
// This last factor dominates the blowup, so the final estimate is:
return 32 + srcLen + srcLen/6
}
// NewWriter returns a new Writer that compresses to w, using the framing
// format described at
// https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
func NewWriter(w io.Writer) *Writer {
return &Writer{
w: w,
enc: make([]byte, MaxEncodedLen(maxUncompressedChunkLen)),
}
}
// Writer is an io.Writer than can write Snappy-compressed bytes.
type Writer struct {
w io.Writer
err error
enc []byte
buf [checksumSize + chunkHeaderSize]byte
wroteHeader bool
}
// Reset discards the writer's state and switches the Snappy writer to write to
// w. This permits reusing a Writer rather than allocating a new one.
func (w *Writer) Reset(writer io.Writer) {
w.w = writer
w.err = nil
w.wroteHeader = false
}
// Write satisfies the io.Writer interface.
func (w *Writer) Write(p []byte) (n int, errRet error) {
if w.err != nil {
return 0, w.err
}
if !w.wroteHeader {
copy(w.enc, magicChunk)
if _, err := w.w.Write(w.enc[:len(magicChunk)]); err != nil {
w.err = err
return n, err
}
w.wroteHeader = true
}
for len(p) > 0 {
var uncompressed []byte
if len(p) > maxUncompressedChunkLen {
uncompressed, p = p[:maxUncompressedChunkLen], p[maxUncompressedChunkLen:]
} else {
uncompressed, p = p, nil
}
checksum := crc(uncompressed)
// Compress the buffer, discarding the result if the improvement
// isn't at least 12.5%.
chunkType := uint8(chunkTypeCompressedData)
chunkBody, err := Encode(w.enc, uncompressed)
if err != nil {
w.err = err
return n, err
}
if len(chunkBody) >= len(uncompressed)-len(uncompressed)/8 {
chunkType, chunkBody = chunkTypeUncompressedData, uncompressed
}
chunkLen := 4 + len(chunkBody)
w.buf[0] = chunkType
w.buf[1] = uint8(chunkLen >> 0)
w.buf[2] = uint8(chunkLen >> 8)
w.buf[3] = uint8(chunkLen >> 16)
w.buf[4] = uint8(checksum >> 0)
w.buf[5] = uint8(checksum >> 8)
w.buf[6] = uint8(checksum >> 16)
w.buf[7] = uint8(checksum >> 24)
if _, err = w.w.Write(w.buf[:]); err != nil {
w.err = err
return n, err
}
if _, err = w.w.Write(chunkBody); err != nil {
w.err = err
return n, err
}
n += len(uncompressed)
}
return n, nil
}