diff --git a/daemon/start.go b/daemon/start.go index 57a7267b7c..f1f713b271 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -3,6 +3,7 @@ package daemon // import "github.com/docker/docker/daemon" import ( "context" "runtime" + "syscall" "time" "github.com/docker/docker/api/types" @@ -195,11 +196,23 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint } } + // Platform-specific processing between Create and Start + handle, err := postCreate(spec) + if err != nil { + if err2 := daemon.containerd.Delete(context.Background(), container.ID); err2 != nil { + logrus.WithError(err2).WithField("container", container.ID).Error("failed to delete failed post-create processing") + } + return err + } + // TODO(mlaventure): we need to specify checkpoint options here pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir, container.StreamConfig.Stdin() != nil || container.Config.Tty, container.InitializeStdio) if err != nil { + if handle != 0 { + syscall.CloseHandle(handle) + } if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil { logrus.WithError(err).WithField("container", container.ID). Error("failed to delete failed start container") @@ -207,6 +220,9 @@ func (daemon *Daemon) containerStart(container *container.Container, checkpoint return translateContainerdStartErr(container.Path, container.SetExitCode, err) } + // Platform-specific processing after start + postStart(spec, handle) + container.SetRunning(pid, true) container.HasBeenStartedBefore = true daemon.setStateCounter(container) diff --git a/daemon/start_unix.go b/daemon/start_unix.go index e680b95f42..f4aae6bc5a 100644 --- a/daemon/start_unix.go +++ b/daemon/start_unix.go @@ -6,10 +6,12 @@ import ( "fmt" "os/exec" "path/filepath" + "syscall" "github.com/containerd/containerd/runtime/linux/runctypes" "github.com/docker/docker/container" "github.com/docker/docker/errdefs" + specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" ) @@ -55,3 +57,13 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain return opts, nil } + +// postCreate does platform-specific process after a container has been created, +// but before it has been started. +func postCreate(spec *specs.Spec) error { + return nil +} + +// postStart does platform-specific process after a container has been started. +func postStart(spec *specs.Spec, handle syscall.Handle) { +} diff --git a/daemon/start_windows.go b/daemon/start_windows.go index abe84c565a..9dc38ca144 100644 --- a/daemon/start_windows.go +++ b/daemon/start_windows.go @@ -1,10 +1,18 @@ package daemon // import "github.com/docker/docker/daemon" import ( + "fmt" + "path/filepath" + "syscall" + "unsafe" + + "github.com/Microsoft/go-winio/vhd" "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" "github.com/Microsoft/opengcs/client" "github.com/docker/docker/container" "github.com/docker/docker/pkg/system" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" ) func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Container) (interface{}, error) { @@ -46,3 +54,109 @@ func (daemon *Daemon) getLibcontainerdCreateOptions(container *container.Contain return nil, nil } + +// postCreate does platform-specific process after a container has been created, +// but before it has been started. +func postCreate(spec *specs.Spec) (syscall.Handle, error) { + + // Check if any action is needed first. + if !postCreateStartActionNeeded(spec) { + return 0, nil + } + + // Operating on the scratch disk + path := filepath.Join(spec.Windows.LayerFolders[len(spec.Windows.LayerFolders)-1], "sandbox.vhdx") + + if spec.Windows.HyperV == nil { + // Argon (WCOW) + handle, err := vhd.OpenVirtualDisk(path, vhd.VirtualDiskAccessNone, vhd.OpenVirtualDiskFlagParentCachedIO|vhd.OpenVirtualDiskFlagIgnoreRelativeParentLocator) + if err != nil { + syscall.CloseHandle(handle) + return 0, errors.Wrap(err, fmt.Sprintf("failed to open %s", path)) + } + if err := setVhdWriteCacheMode(handle, WriteCacheModeDisableFlushing); err != nil { + syscall.CloseHandle(handle) + return 0, errors.Wrap(err, fmt.Sprintf("failed to disable flushing on %s", path)) + } + return handle, nil + } + + // TODO Xenon (WCOW) + return 0, nil +} + +// postStart does platform-specific process after a container has been started. +func postStart(spec *specs.Spec, handle syscall.Handle) { + if handle == 0 { + return + } + + if !postCreateStartActionNeeded(spec) { + return + } + + setVhdWriteCacheMode(handle, WriteCacheModeCacheMetadata) + syscall.CloseHandle(handle) +} + +// postCreateStartActionNeeded determines if there is something that needs +// to be done in the postCreate or postStart functions. +func postCreateStartActionNeeded(spec *specs.Spec) bool { + // No-op if not using containerd runtime + if !system.ContainerdRuntimeSupported() { + return false + } + + // No-op pre-RS5 or post-18855. Pre-RS5 doesn't use v2. Post 18855 has + // these optimisations in the platform for v2 callers. + osv := system.GetOSVersion() + fmt.Println(osv) + if osv.Build < 17763 || osv.Build >= 18855 { + return false + } + + // No-op if we're not optimising, or LCOW. + if spec == nil || spec.Windows == nil || !spec.Windows.IgnoreFlushesDuringBoot || spec.Linux != nil { + return false + } + return true +} + +type WriteCacheMode uint16 + +const ( + // Write Cache Mode for a VHD. + WriteCacheModeCacheMetadata WriteCacheMode = 0 + WriteCacheModeWriteInternalMetadata WriteCacheMode = 1 + WriteCacheModeWriteMetadata WriteCacheMode = 2 + WriteCacheModeCommitAll WriteCacheMode = 3 + WriteCacheModeDisableFlushing WriteCacheMode = 4 +) + +// setVhdWriteCacheMode sets the WriteCacheMode for a VHD. The handle +// to the VHD should be opened with Access: None, Flags: ParentCachedIO | +// IgnoreRelativeParentLocator. Use DisableFlushing for optimisation during +// first boot, and CacheMetadata following container start +func setVhdWriteCacheMode(handle syscall.Handle, wcm WriteCacheMode) error { + type storageSetSurfaceCachePolicyRequest struct { + RequestLevel uint32 + CacheMode uint16 + pad uint16 // For 4-byte alignment + } + const ioctlSetSurfaceCachePolicy uint32 = 0x2d1a10 + request := storageSetSurfaceCachePolicyRequest{ + RequestLevel: 1, + CacheMode: uint16(wcm), + pad: 0, + } + var bytesReturned uint32 + return syscall.DeviceIoControl( + handle, + ioctlSetSurfaceCachePolicy, + (*byte)(unsafe.Pointer(&request)), + uint32(unsafe.Sizeof(request)), + nil, + 0, + &bytesReturned, + nil) +} diff --git a/libcontainerd/remote/client.go b/libcontainerd/remote/client.go index 07999b8ab0..2587948154 100644 --- a/libcontainerd/remote/client.go +++ b/libcontainerd/remote/client.go @@ -283,6 +283,7 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin return -1, errors.Wrap(err, "failed to retrieve spec") } uid, gid := getSpecUser(spec) + t, err = ctr.ctr.NewTask(ctx, func(id string) (cio.IO, error) { fifos := newFIFOSet(ctr.bundleDir, libcontainerdtypes.InitProcessName, withStdin, spec.Process.Terminal)