From 35a22c9e12c05e2a0a205964702ced78ea39d7a1 Mon Sep 17 00:00:00 2001 From: Josh Hawn Date: Wed, 17 Dec 2014 18:26:03 -0800 Subject: [PATCH] Refactor to optimize storage driver ApplyDiff() To avoid an expensive call to archive.ChangesDirs() which walks two directory trees and compares every entry, archive.ApplyLayer() has been extended to also return the size of the layer changes. Docker-DCO-1.1-Signed-off-by: Josh Hawn (github: jlhawn) --- daemon/graphdriver/aufs/aufs.go | 4 +-- daemon/graphdriver/driver.go | 4 +-- daemon/graphdriver/fsdiff.go | 30 ++++--------------- daemon/graphdriver/overlay/overlay.go | 13 +++----- pkg/archive/changes_test.go | 2 +- pkg/archive/diff.go | 40 ++++++++++++++----------- pkg/archive/utils_test.go | 3 +- pkg/chrootarchive/archive_test.go | 2 +- pkg/chrootarchive/diff.go | 43 ++++++++++++++++++++++----- 9 files changed, 75 insertions(+), 66 deletions(-) diff --git a/daemon/graphdriver/aufs/aufs.go b/daemon/graphdriver/aufs/aufs.go index 210f623e34..82a5c89051 100644 --- a/daemon/graphdriver/aufs/aufs.go +++ b/daemon/graphdriver/aufs/aufs.go @@ -312,7 +312,7 @@ func (a *Driver) applyDiff(id string, diff archive.ArchiveReader) error { // DiffSize calculates the changes between the specified id // and its parent and returns the size in bytes of the changes // relative to its base filesystem directory. -func (a *Driver) DiffSize(id, parent string) (bytes int64, err error) { +func (a *Driver) DiffSize(id, parent string) (size int64, err error) { // AUFS doesn't need the parent layer to calculate the diff size. return utils.TreeSize(path.Join(a.rootPath(), "diff", id)) } @@ -320,7 +320,7 @@ func (a *Driver) DiffSize(id, parent string) (bytes int64, err error) { // ApplyDiff extracts the changeset from the given diff into the // layer with the specified id and parent, returning the size of the // new layer in bytes. -func (a *Driver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) { +func (a *Driver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error) { // AUFS doesn't need the parent id to apply the diff. if err = a.applyDiff(id, diff); err != nil { return diff --git a/daemon/graphdriver/driver.go b/daemon/graphdriver/driver.go index 95479bf64f..d969614728 100644 --- a/daemon/graphdriver/driver.go +++ b/daemon/graphdriver/driver.go @@ -63,11 +63,11 @@ type Driver interface { // ApplyDiff extracts the changeset from the given diff into the // layer with the specified id and parent, returning the size of the // new layer in bytes. - ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) + ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error) // DiffSize calculates the changes between the specified id // and its parent and returns the size in bytes of the changes // relative to its base filesystem directory. - DiffSize(id, parent string) (bytes int64, err error) + DiffSize(id, parent string) (size int64, err error) } var ( diff --git a/daemon/graphdriver/fsdiff.go b/daemon/graphdriver/fsdiff.go index 48852a5631..ab1b08f620 100644 --- a/daemon/graphdriver/fsdiff.go +++ b/daemon/graphdriver/fsdiff.go @@ -3,14 +3,12 @@ package graphdriver import ( - "fmt" "time" log "github.com/Sirupsen/logrus" "github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/chrootarchive" "github.com/docker/docker/pkg/ioutils" - "github.com/docker/docker/utils" ) // naiveDiffDriver takes a ProtoDriver and adds the @@ -27,8 +25,8 @@ type naiveDiffDriver struct { // it may or may not support on its own: // Diff(id, parent string) (archive.Archive, error) // Changes(id, parent string) ([]archive.Change, error) -// ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) -// DiffSize(id, parent string) (bytes int64, err error) +// ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error) +// DiffSize(id, parent string) (size int64, err error) func NaiveDiffDriver(driver ProtoDriver) Driver { return &naiveDiffDriver{ProtoDriver: driver} } @@ -111,7 +109,7 @@ func (gdw *naiveDiffDriver) Changes(id, parent string) ([]archive.Change, error) // ApplyDiff extracts the changeset from the given diff into the // layer with the specified id and parent, returning the size of the // new layer in bytes. -func (gdw *naiveDiffDriver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) { +func (gdw *naiveDiffDriver) ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error) { driver := gdw.ProtoDriver // Mount the root filesystem so we can apply the diff/layer. @@ -123,34 +121,18 @@ func (gdw *naiveDiffDriver) ApplyDiff(id, parent string, diff archive.ArchiveRea start := time.Now().UTC() log.Debugf("Start untar layer") - if err = chrootarchive.ApplyLayer(layerFs, diff); err != nil { + if size, err = chrootarchive.ApplyLayer(layerFs, diff); err != nil { return } log.Debugf("Untar time: %vs", time.Now().UTC().Sub(start).Seconds()) - if parent == "" { - return utils.TreeSize(layerFs) - } - - parentFs, err := driver.Get(parent, "") - if err != nil { - err = fmt.Errorf("Driver %s failed to get image parent %s: %s", driver, parent, err) - return - } - defer driver.Put(parent) - - changes, err := archive.ChangesDirs(layerFs, parentFs) - if err != nil { - return - } - - return archive.ChangesSize(layerFs, changes), nil + return } // DiffSize calculates the changes between the specified layer // and its parent and returns the size in bytes of the changes // relative to its base filesystem directory. -func (gdw *naiveDiffDriver) DiffSize(id, parent string) (bytes int64, err error) { +func (gdw *naiveDiffDriver) DiffSize(id, parent string) (size int64, err error) { driver := gdw.ProtoDriver changes, err := gdw.Changes(id, parent) diff --git a/daemon/graphdriver/overlay/overlay.go b/daemon/graphdriver/overlay/overlay.go index 2569ccb6d1..68b6b0ed3e 100644 --- a/daemon/graphdriver/overlay/overlay.go +++ b/daemon/graphdriver/overlay/overlay.go @@ -28,7 +28,7 @@ var ( type ApplyDiffProtoDriver interface { graphdriver.ProtoDriver - ApplyDiff(id, parent string, diff archive.ArchiveReader) (bytes int64, err error) + ApplyDiff(id, parent string, diff archive.ArchiveReader) (size int64, err error) } type naiveDiffDriverWithApply struct { @@ -309,7 +309,7 @@ func (d *Driver) Put(id string) { delete(d.active, id) } -func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (bytes int64, err error) { +func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) (size int64, err error) { dir := d.dir(id) if parent == "" { @@ -347,7 +347,7 @@ func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) return 0, err } - if err := chrootarchive.ApplyLayer(tmpRootDir, diff); err != nil { + if size, err = chrootarchive.ApplyLayer(tmpRootDir, diff); err != nil { return 0, err } @@ -356,12 +356,7 @@ func (d *Driver) ApplyDiff(id string, parent string, diff archive.ArchiveReader) return 0, err } - changes, err := archive.ChangesDirs(rootDir, parentRootDir) - if err != nil { - return 0, err - } - - return archive.ChangesSize(rootDir, changes), nil + return } func (d *Driver) Exists(id string) bool { diff --git a/pkg/archive/changes_test.go b/pkg/archive/changes_test.go index 34c0f0da64..6b8f2354b8 100644 --- a/pkg/archive/changes_test.go +++ b/pkg/archive/changes_test.go @@ -286,7 +286,7 @@ func TestApplyLayer(t *testing.T) { t.Fatal(err) } - if err := ApplyLayer(src, layerCopy); err != nil { + if _, err := ApplyLayer(src, layerCopy); err != nil { t.Fatal(err) } diff --git a/pkg/archive/diff.go b/pkg/archive/diff.go index ba22c41f3c..ca282071f5 100644 --- a/pkg/archive/diff.go +++ b/pkg/archive/diff.go @@ -15,7 +15,7 @@ import ( "github.com/docker/docker/pkg/system" ) -func UnpackLayer(dest string, layer ArchiveReader) error { +func UnpackLayer(dest string, layer ArchiveReader) (size int64, err error) { tr := tar.NewReader(layer) trBuf := pools.BufioReader32KPool.Get(tr) defer pools.BufioReader32KPool.Put(trBuf) @@ -33,9 +33,11 @@ func UnpackLayer(dest string, layer ArchiveReader) error { break } if err != nil { - return err + return 0, err } + size += hdr.Size + // Normalize name, for safety and for a simple is-root check hdr.Name = filepath.Clean(hdr.Name) @@ -48,7 +50,7 @@ func UnpackLayer(dest string, layer ArchiveReader) error { if _, err := os.Lstat(parentPath); err != nil && os.IsNotExist(err) { err = os.MkdirAll(parentPath, 0600) if err != nil { - return err + return 0, err } } } @@ -63,12 +65,12 @@ func UnpackLayer(dest string, layer ArchiveReader) error { aufsHardlinks[basename] = hdr if aufsTempdir == "" { if aufsTempdir, err = ioutil.TempDir("", "dockerplnk"); err != nil { - return err + return 0, err } defer os.RemoveAll(aufsTempdir) } if err := createTarFile(filepath.Join(aufsTempdir, basename), dest, hdr, tr, true); err != nil { - return err + return 0, err } } continue @@ -77,10 +79,10 @@ func UnpackLayer(dest string, layer ArchiveReader) error { path := filepath.Join(dest, hdr.Name) rel, err := filepath.Rel(dest, path) if err != nil { - return err + return 0, err } if strings.HasPrefix(rel, "..") { - return breakoutError(fmt.Errorf("%q is outside of %q", hdr.Name, dest)) + return 0, breakoutError(fmt.Errorf("%q is outside of %q", hdr.Name, dest)) } base := filepath.Base(path) @@ -88,7 +90,7 @@ func UnpackLayer(dest string, layer ArchiveReader) error { originalBase := base[len(".wh."):] originalPath := filepath.Join(filepath.Dir(path), originalBase) if err := os.RemoveAll(originalPath); err != nil { - return err + return 0, err } } else { // If path exits we almost always just want to remove and replace it. @@ -98,7 +100,7 @@ func UnpackLayer(dest string, layer ArchiveReader) error { if fi, err := os.Lstat(path); err == nil { if !(fi.IsDir() && hdr.Typeflag == tar.TypeDir) { if err := os.RemoveAll(path); err != nil { - return err + return 0, err } } } @@ -113,18 +115,18 @@ func UnpackLayer(dest string, layer ArchiveReader) error { linkBasename := filepath.Base(hdr.Linkname) srcHdr = aufsHardlinks[linkBasename] if srcHdr == nil { - return fmt.Errorf("Invalid aufs hardlink") + return 0, fmt.Errorf("Invalid aufs hardlink") } tmpFile, err := os.Open(filepath.Join(aufsTempdir, linkBasename)) if err != nil { - return err + return 0, err } defer tmpFile.Close() srcData = tmpFile } if err := createTarFile(path, dest, srcHdr, srcData, true); err != nil { - return err + return 0, err } // Directory mtimes must be handled at the end to avoid further @@ -139,27 +141,29 @@ func UnpackLayer(dest string, layer ArchiveReader) error { path := filepath.Join(dest, hdr.Name) ts := []syscall.Timespec{timeToTimespec(hdr.AccessTime), timeToTimespec(hdr.ModTime)} if err := syscall.UtimesNano(path, ts); err != nil { - return err + return 0, err } } - return nil + + return size, nil } // ApplyLayer parses a diff in the standard layer format from `layer`, and -// applies it to the directory `dest`. -func ApplyLayer(dest string, layer ArchiveReader) error { +// applies it to the directory `dest`. Returns the size in bytes of the +// contents of the layer. +func ApplyLayer(dest string, layer ArchiveReader) (int64, error) { dest = filepath.Clean(dest) // We need to be able to set any perms oldmask, err := system.Umask(0) if err != nil { - return err + return 0, err } defer system.Umask(oldmask) // ignore err, ErrNotSupportedPlatform layer, err = DecompressStream(layer) if err != nil { - return err + return 0, err } return UnpackLayer(dest, layer) } diff --git a/pkg/archive/utils_test.go b/pkg/archive/utils_test.go index 3624fe5afa..9048027203 100644 --- a/pkg/archive/utils_test.go +++ b/pkg/archive/utils_test.go @@ -17,7 +17,8 @@ var testUntarFns = map[string]func(string, io.Reader) error{ return Untar(r, dest, nil) }, "applylayer": func(dest string, r io.Reader) error { - return ApplyLayer(dest, ArchiveReader(r)) + _, err := ApplyLayer(dest, ArchiveReader(r)) + return err }, } diff --git a/pkg/chrootarchive/archive_test.go b/pkg/chrootarchive/archive_test.go index 0fe3d64f95..bb8a22dc70 100644 --- a/pkg/chrootarchive/archive_test.go +++ b/pkg/chrootarchive/archive_test.go @@ -95,7 +95,7 @@ func TestChrootApplyEmptyArchiveFromSlowReader(t *testing.T) { t.Fatal(err) } stream := &slowEmptyTarReader{size: 10240, chunkSize: 1024} - if err := ApplyLayer(dest, stream); err != nil { + if _, err := ApplyLayer(dest, stream); err != nil { t.Fatal(err) } } diff --git a/pkg/chrootarchive/diff.go b/pkg/chrootarchive/diff.go index d4e9529b6d..ac1cbf9bea 100644 --- a/pkg/chrootarchive/diff.go +++ b/pkg/chrootarchive/diff.go @@ -1,6 +1,8 @@ package chrootarchive import ( + "bytes" + "encoding/json" "flag" "fmt" "io" @@ -14,6 +16,10 @@ import ( "github.com/docker/docker/pkg/reexec" ) +type applyLayerResponse struct { + LayerSize int64 `json:"layerSize"` +} + func applyLayer() { runtime.LockOSThread() flag.Parse() @@ -21,6 +27,7 @@ func applyLayer() { if err := chroot(flag.Arg(0)); err != nil { fatal(err) } + // We need to be able to set any perms oldmask := syscall.Umask(0) defer syscall.Umask(oldmask) @@ -28,33 +35,53 @@ func applyLayer() { if err != nil { fatal(err) } + os.Setenv("TMPDIR", tmpDir) - err = archive.UnpackLayer("/", os.Stdin) + size, err := archive.UnpackLayer("/", os.Stdin) os.RemoveAll(tmpDir) if err != nil { fatal(err) } - os.RemoveAll(tmpDir) + + encoder := json.NewEncoder(os.Stdout) + if err := encoder.Encode(applyLayerResponse{size}); err != nil { + fatal(fmt.Errorf("unable to encode layerSize JSON: %s", err)) + } + + flush(os.Stdout) flush(os.Stdin) os.Exit(0) } -func ApplyLayer(dest string, layer archive.ArchiveReader) error { +func ApplyLayer(dest string, layer archive.ArchiveReader) (size int64, err error) { dest = filepath.Clean(dest) decompressed, err := archive.DecompressStream(layer) if err != nil { - return err + return 0, err } + defer func() { if c, ok := decompressed.(io.Closer); ok { c.Close() } }() + cmd := reexec.Command("docker-applyLayer", dest) cmd.Stdin = decompressed - out, err := cmd.CombinedOutput() - if err != nil { - return fmt.Errorf("ApplyLayer %s %s", err, out) + + outBuf, errBuf := new(bytes.Buffer), new(bytes.Buffer) + cmd.Stdout, cmd.Stderr = outBuf, errBuf + + if err = cmd.Run(); err != nil { + return 0, fmt.Errorf("ApplyLayer %s stdout: %s stderr: %s", err, outBuf, errBuf) } - return nil + + // Stdout should be a valid JSON struct representing an applyLayerResponse. + response := applyLayerResponse{} + decoder := json.NewDecoder(outBuf) + if err = decoder.Decode(&response); err != nil { + return 0, fmt.Errorf("unable to decode ApplyLayer JSON response: %s", err) + } + + return response.LayerSize, nil }