2015-09-14 21:33:55 +03:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
// Package pargzip contains a parallel gzip writer implementation. By
|
|
|
|
// compressing each chunk of data in parallel, all the CPUs on the
|
|
|
|
// machine can be used, at a slight loss of compression efficiency.
|
|
|
|
package pargzip
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"compress/gzip"
|
|
|
|
"io"
|
|
|
|
"runtime"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
)
|
|
|
|
|
|
|
|
// A Writer is an io.WriteCloser.
|
|
|
|
// Writes to a Writer are compressed and written to w.
|
|
|
|
//
|
|
|
|
// Any exported fields may only be mutated before the first call to
|
|
|
|
// Write.
|
|
|
|
type Writer struct {
|
|
|
|
// ChunkSize is the number of bytes to gzip at once.
|
|
|
|
// The default from NewWriter is 1MB.
|
|
|
|
ChunkSize int
|
|
|
|
|
|
|
|
// Parallel is the number of chunks to compress in parallel.
|
|
|
|
// The default from NewWriter is runtime.NumCPU().
|
|
|
|
Parallel int
|
|
|
|
|
|
|
|
w io.Writer
|
|
|
|
bw *bufio.Writer
|
|
|
|
|
2015-09-18 03:14:42 +03:00
|
|
|
allWritten chan struct{} // when writing goroutine ends
|
|
|
|
wasWriteErr chan struct{} // closed after 'err' set
|
2015-09-14 21:33:55 +03:00
|
|
|
|
|
|
|
sem chan bool // semaphore bounding compressions in flight
|
|
|
|
chunkc chan *writeChunk // closed on Close
|
|
|
|
|
|
|
|
mu sync.Mutex // guards following
|
|
|
|
closed bool
|
|
|
|
err error // sticky write error
|
|
|
|
}
|
|
|
|
|
|
|
|
type writeChunk struct {
|
|
|
|
zw *Writer
|
|
|
|
p string // uncompressed
|
|
|
|
|
|
|
|
donec chan struct{} // closed on completion
|
|
|
|
|
|
|
|
// one of following is set:
|
|
|
|
z []byte // compressed
|
|
|
|
err error // exec error
|
|
|
|
}
|
|
|
|
|
|
|
|
// compress runs the gzip child process.
|
|
|
|
// It runs in its own goroutine.
|
|
|
|
func (c *writeChunk) compress() (err error) {
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
c.err = err
|
|
|
|
}
|
|
|
|
close(c.donec)
|
|
|
|
<-c.zw.sem
|
|
|
|
}()
|
|
|
|
var zbuf bytes.Buffer
|
2016-09-20 09:12:04 +03:00
|
|
|
zw := gzip.NewWriter(&zbuf)
|
|
|
|
if _, err := io.Copy(zw, strings.NewReader(c.p)); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := zw.Close(); err != nil {
|
|
|
|
return err
|
2015-09-14 21:33:55 +03:00
|
|
|
}
|
|
|
|
c.z = zbuf.Bytes()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewWriter returns a new Writer.
|
|
|
|
// Writes to the returned writer are compressed and written to w.
|
|
|
|
//
|
|
|
|
// It is the caller's responsibility to call Close on the WriteCloser
|
|
|
|
// when done. Writes may be buffered and not flushed until Close.
|
|
|
|
//
|
|
|
|
// Any fields on Writer may only be modified before the first call to
|
|
|
|
// Write.
|
|
|
|
func NewWriter(w io.Writer) *Writer {
|
|
|
|
return &Writer{
|
2015-09-18 03:14:42 +03:00
|
|
|
w: w,
|
|
|
|
allWritten: make(chan struct{}),
|
|
|
|
wasWriteErr: make(chan struct{}),
|
2015-09-14 21:33:55 +03:00
|
|
|
|
2016-09-20 09:12:04 +03:00
|
|
|
ChunkSize: 1 << 20,
|
|
|
|
Parallel: runtime.NumCPU(),
|
2015-09-14 21:33:55 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *Writer) didInit() bool { return w.bw != nil }
|
|
|
|
|
|
|
|
func (w *Writer) init() {
|
|
|
|
w.bw = bufio.NewWriterSize(newChunkWriter{w}, w.ChunkSize)
|
|
|
|
w.chunkc = make(chan *writeChunk, w.Parallel+1)
|
|
|
|
w.sem = make(chan bool, w.Parallel)
|
|
|
|
go func() {
|
|
|
|
defer close(w.allWritten)
|
|
|
|
for c := range w.chunkc {
|
|
|
|
if err := w.writeCompressedChunk(c); err != nil {
|
2015-09-18 03:14:42 +03:00
|
|
|
close(w.wasWriteErr)
|
2015-09-14 21:33:55 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *Writer) startChunk(p []byte) {
|
|
|
|
w.sem <- true // block until we can begin
|
|
|
|
c := &writeChunk{
|
|
|
|
zw: w,
|
|
|
|
p: string(p), // string, since the bufio.Writer owns the slice
|
|
|
|
donec: make(chan struct{}),
|
|
|
|
}
|
|
|
|
go c.compress() // receives from w.sem
|
2015-09-18 03:14:42 +03:00
|
|
|
select {
|
|
|
|
case w.chunkc <- c:
|
|
|
|
case <-w.wasWriteErr:
|
|
|
|
// Discard chunks that come after any chunk that failed
|
|
|
|
// to write.
|
|
|
|
}
|
2015-09-14 21:33:55 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
func (w *Writer) writeCompressedChunk(c *writeChunk) (err error) {
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
w.mu.Lock()
|
|
|
|
defer w.mu.Unlock()
|
|
|
|
if w.err == nil {
|
|
|
|
w.err = err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
<-c.donec
|
|
|
|
if c.err != nil {
|
|
|
|
return c.err
|
|
|
|
}
|
|
|
|
_, err = w.w.Write(c.z)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *Writer) Write(p []byte) (n int, err error) {
|
|
|
|
if !w.didInit() {
|
|
|
|
w.init()
|
|
|
|
}
|
|
|
|
return w.bw.Write(p)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *Writer) Close() error {
|
|
|
|
w.mu.Lock()
|
|
|
|
err, wasClosed := w.err, w.closed
|
|
|
|
w.closed = true
|
|
|
|
w.mu.Unlock()
|
|
|
|
if wasClosed {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if !w.didInit() {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
w.bw.Flush()
|
|
|
|
close(w.chunkc)
|
|
|
|
<-w.allWritten // wait for writing goroutine to end
|
|
|
|
|
|
|
|
w.mu.Lock()
|
|
|
|
err = w.err
|
|
|
|
w.mu.Unlock()
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// newChunkWriter gets large chunks to compress and write to zw.
|
|
|
|
type newChunkWriter struct {
|
|
|
|
zw *Writer
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cw newChunkWriter) Write(p []byte) (n int, err error) {
|
|
|
|
n = len(p)
|
|
|
|
max := cw.zw.ChunkSize
|
|
|
|
for len(p) > 0 {
|
|
|
|
chunk := p
|
|
|
|
if len(chunk) > max {
|
|
|
|
chunk = chunk[:max]
|
|
|
|
}
|
|
|
|
p = p[len(chunk):]
|
|
|
|
cw.zw.startChunk(chunk)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|