From d8fef66b03c1ea8715470690efbd950033f7f628 Mon Sep 17 00:00:00 2001 From: boucher Date: Thu, 12 May 2016 10:52:00 -0400 Subject: [PATCH] Initial implementation of containerd Checkpoint API. Signed-off-by: boucher --- Dockerfile | 13 +++ api/server/router/checkpoint/backend.go | 12 +++ api/server/router/checkpoint/checkpoint.go | 28 +++++++ .../checkpoint/checkpoint_experimental.go | 15 ++++ .../router/checkpoint/checkpoint_regular.go | 8 ++ .../router/checkpoint/checkpoint_routes.go | 60 ++++++++++++++ api/server/router/container/backend.go | 2 +- .../router/container/container_routes.go | 10 ++- builder/builder.go | 9 +- builder/dockerfile/internals.go | 2 +- cli/command/checkpoint/cmd.go | 12 +++ cli/command/checkpoint/cmd_experimental.go | 31 +++++++ cli/command/checkpoint/create.go | 54 ++++++++++++ cli/command/checkpoint/list.go | 47 +++++++++++ cli/command/checkpoint/remove.go | 28 +++++++ cli/command/commands/commands.go | 2 + cli/command/container/start.go | 19 ++++- cli/command/container/start_utils.go | 8 ++ .../container/start_utils_experimental.go | 9 ++ cmd/dockerd/daemon.go | 2 +- cmd/dockerd/routes.go | 8 +- cmd/dockerd/routes_experimental.go | 7 +- container/container.go | 5 ++ daemon/checkpoint.go | 82 +++++++++++++++++++ daemon/cluster/executor/backend.go | 2 +- daemon/cluster/executor/container/adapter.go | 2 +- daemon/create.go | 3 + daemon/daemon.go | 2 +- daemon/monitor_windows.go | 2 +- daemon/restart.go | 2 +- daemon/start.go | 10 +-- experimental/README.md | 7 +- experimental/checkpoint-restore.md | 75 +++++++++++++++++ integration-cli/docker_cli_help_test.go | 7 ++ libcontainerd/client_linux.go | 58 ++++++++++++- libcontainerd/client_solaris.go | 2 +- libcontainerd/client_windows.go | 14 +++- libcontainerd/container_linux.go | 16 ++-- libcontainerd/container_windows.go | 2 +- libcontainerd/types.go | 5 +- libcontainerd/types_linux.go | 3 + libcontainerd/types_windows.go | 10 +++ plugin/manager_linux.go | 2 +- 43 files changed, 659 insertions(+), 38 deletions(-) create mode 100644 api/server/router/checkpoint/backend.go create mode 100644 api/server/router/checkpoint/checkpoint.go create mode 100644 api/server/router/checkpoint/checkpoint_experimental.go create mode 100644 api/server/router/checkpoint/checkpoint_regular.go create mode 100644 api/server/router/checkpoint/checkpoint_routes.go create mode 100644 cli/command/checkpoint/cmd.go create mode 100644 cli/command/checkpoint/cmd_experimental.go create mode 100644 cli/command/checkpoint/create.go create mode 100644 cli/command/checkpoint/list.go create mode 100644 cli/command/checkpoint/remove.go create mode 100644 cli/command/container/start_utils.go create mode 100644 cli/command/container/start_utils_experimental.go create mode 100644 daemon/checkpoint.go create mode 100644 experimental/checkpoint-restore.md diff --git a/Dockerfile b/Dockerfile index 4be797f977..cc0c460816 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,12 +57,17 @@ RUN apt-get update && apt-get install -y \ libapparmor-dev \ libcap-dev \ libltdl-dev \ + libnl-3-dev \ + libprotobuf-c0-dev \ + libprotobuf-dev \ libsqlite3-dev \ libsystemd-journal-dev \ libtool \ mercurial \ net-tools \ pkg-config \ + protobuf-compiler \ + protobuf-c-compiler \ python-dev \ python-mock \ python-pip \ @@ -145,6 +150,14 @@ RUN git clone https://github.com/golang/lint.git /go/src/github.com/golang/lint && (cd /go/src/github.com/golang/lint && git checkout -q $GO_LINT_COMMIT) \ && go install -v github.com/golang/lint/golint +# Install CRIU for checkpoint/restore support +ENV CRIU_VERSION 2.2 +RUN mkdir -p /usr/src/criu \ + && curl -sSL https://github.com/xemul/criu/archive/v${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \ + && cd /usr/src/criu \ + && make \ + && make install-criu + # Install two versions of the registry. The first is an older version that # only supports schema1 manifests. The second is a newer version that supports # both. This allows integration-cli tests to cover push/pull with both schema1 diff --git a/api/server/router/checkpoint/backend.go b/api/server/router/checkpoint/backend.go new file mode 100644 index 0000000000..a6a4dd0757 --- /dev/null +++ b/api/server/router/checkpoint/backend.go @@ -0,0 +1,12 @@ +// +build experimental + +package checkpoint + +import "github.com/docker/docker/api/types" + +// Backend for Checkpoint +type Backend interface { + CheckpointCreate(container string, config types.CheckpointCreateOptions) error + CheckpointDelete(container string, checkpointID string) error + CheckpointList(container string) ([]types.Checkpoint, error) +} diff --git a/api/server/router/checkpoint/checkpoint.go b/api/server/router/checkpoint/checkpoint.go new file mode 100644 index 0000000000..65f8961f5c --- /dev/null +++ b/api/server/router/checkpoint/checkpoint.go @@ -0,0 +1,28 @@ +package checkpoint + +import ( + "github.com/docker/docker/api/server/httputils" + "github.com/docker/docker/api/server/router" +) + +// checkpointRouter is a router to talk with the checkpoint controller +type checkpointRouter struct { + backend Backend + decoder httputils.ContainerDecoder + routes []router.Route +} + +// NewRouter initializes a new checkpoint router +func NewRouter(b Backend, decoder httputils.ContainerDecoder) router.Router { + r := &checkpointRouter{ + backend: b, + decoder: decoder, + } + r.initRoutes() + return r +} + +// Routes returns the available routers to the checkpoint controller +func (r *checkpointRouter) Routes() []router.Route { + return r.routes +} diff --git a/api/server/router/checkpoint/checkpoint_experimental.go b/api/server/router/checkpoint/checkpoint_experimental.go new file mode 100644 index 0000000000..8e495f1ea6 --- /dev/null +++ b/api/server/router/checkpoint/checkpoint_experimental.go @@ -0,0 +1,15 @@ +// +build experimental + +package checkpoint + +import ( + "github.com/docker/docker/api/server/router" +) + +func (r *checkpointRouter) initRoutes() { + r.routes = []router.Route{ + router.NewGetRoute("/containers/{name:.*}/checkpoints", r.getContainerCheckpoints), + router.NewPostRoute("/containers/{name:.*}/checkpoints", r.postContainerCheckpoint), + router.NewDeleteRoute("/containers/{name:.*}/checkpoints/{checkpoint:.*}", r.deleteContainerCheckpoint), + } +} diff --git a/api/server/router/checkpoint/checkpoint_regular.go b/api/server/router/checkpoint/checkpoint_regular.go new file mode 100644 index 0000000000..df93bba977 --- /dev/null +++ b/api/server/router/checkpoint/checkpoint_regular.go @@ -0,0 +1,8 @@ +// +build !experimental + +package checkpoint + +func (r *checkpointRouter) initRoutes() {} + +// Backend is empty so that the package can compile in non-experimental +type Backend interface{} diff --git a/api/server/router/checkpoint/checkpoint_routes.go b/api/server/router/checkpoint/checkpoint_routes.go new file mode 100644 index 0000000000..3b8812bfcd --- /dev/null +++ b/api/server/router/checkpoint/checkpoint_routes.go @@ -0,0 +1,60 @@ +// +build experimental + +package checkpoint + +import ( + "encoding/json" + "net/http" + + "github.com/docker/docker/api/server/httputils" + "github.com/docker/docker/api/types" + "golang.org/x/net/context" +) + +func (s *checkpointRouter) postContainerCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if err := httputils.ParseForm(r); err != nil { + return err + } + + var options types.CheckpointCreateOptions + + decoder := json.NewDecoder(r.Body) + if err := decoder.Decode(&options); err != nil { + return err + } + + err := s.backend.CheckpointCreate(vars["name"], options) + if err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} + +func (s *checkpointRouter) getContainerCheckpoints(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if err := httputils.ParseForm(r); err != nil { + return err + } + + checkpoints, err := s.backend.CheckpointList(vars["name"]) + if err != nil { + return err + } + + return httputils.WriteJSON(w, http.StatusOK, checkpoints) +} + +func (s *checkpointRouter) deleteContainerCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error { + if err := httputils.ParseForm(r); err != nil { + return err + } + + err := s.backend.CheckpointDelete(vars["name"], vars["checkpoint"]) + if err != nil { + return err + } + + w.WriteHeader(http.StatusNoContent) + return nil +} diff --git a/api/server/router/container/backend.go b/api/server/router/container/backend.go index d6e8268ce7..4e98f72d9c 100644 --- a/api/server/router/container/backend.go +++ b/api/server/router/container/backend.go @@ -39,7 +39,7 @@ type stateBackend interface { ContainerResize(name string, height, width int) error ContainerRestart(name string, seconds int) error ContainerRm(name string, config *types.ContainerRmConfig) error - ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool) error + ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error ContainerStop(name string, seconds int) error ContainerUnpause(name string) error ContainerUpdate(name string, hostConfig *container.HostConfig, validateHostname bool) (types.ContainerUpdateResponse, error) diff --git a/api/server/router/container/container_routes.go b/api/server/router/container/container_routes.go index bb53b13873..42f4c91bb3 100644 --- a/api/server/router/container/container_routes.go +++ b/api/server/router/container/container_routes.go @@ -151,10 +151,16 @@ func (s *containerRouter) postContainersStart(ctx context.Context, w http.Respon hostConfig = c } - validateHostname := versions.GreaterThanOrEqualTo(version, "1.24") - if err := s.backend.ContainerStart(vars["name"], hostConfig, validateHostname); err != nil { + if err := httputils.ParseForm(r); err != nil { return err } + + checkpoint := r.Form.Get("checkpoint") + validateHostname := versions.GreaterThanOrEqualTo(version, "1.24") + if err := s.backend.ContainerStart(vars["name"], hostConfig, validateHostname, checkpoint); err != nil { + return err + } + w.WriteHeader(http.StatusNoContent) return nil } diff --git a/builder/builder.go b/builder/builder.go index e592877e7d..1445c26c31 100644 --- a/builder/builder.go +++ b/builder/builder.go @@ -124,12 +124,19 @@ type Backend interface { // ContainerKill stops the container execution abruptly. ContainerKill(containerID string, sig uint64) error // ContainerStart starts a new container - ContainerStart(containerID string, hostConfig *container.HostConfig, validateHostname bool) error + ContainerStart(containerID string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error // ContainerWait stops processing until the given container is stopped. ContainerWait(containerID string, timeout time.Duration) (int, error) // ContainerUpdateCmdOnBuild updates container.Path and container.Args ContainerUpdateCmdOnBuild(containerID string, cmd []string) error + // CheckpointCreate checkpoints a running container + CheckpointCreate(container string, config types.CheckpointCreateOptions) error + // CheckpointDelete deletes a container's checkpoint + CheckpointDelete(container string, checkpoint string) error + // CheckpointList lists the available checkpoints for a container + CheckpointList(container string) ([]types.Checkpoint, error) + // ContainerCopy copies/extracts a source FileInfo to a destination path inside a container // specified by a container object. // TODO: make an Extract method instead of passing `decompress` diff --git a/builder/dockerfile/internals.go b/builder/dockerfile/internals.go index 267d8e4e72..54d3301f9a 100644 --- a/builder/dockerfile/internals.go +++ b/builder/dockerfile/internals.go @@ -555,7 +555,7 @@ func (b *Builder) run(cID string) (err error) { } }() - if err := b.docker.ContainerStart(cID, nil, true); err != nil { + if err := b.docker.ContainerStart(cID, nil, true, ""); err != nil { return err } diff --git a/cli/command/checkpoint/cmd.go b/cli/command/checkpoint/cmd.go new file mode 100644 index 0000000000..cbeb951793 --- /dev/null +++ b/cli/command/checkpoint/cmd.go @@ -0,0 +1,12 @@ +// +build !experimental + +package checkpoint + +import ( + "github.com/docker/docker/cli/command" + "github.com/spf13/cobra" +) + +// NewCheckpointCommand returns a cobra command for `checkpoint` subcommands +func NewCheckpointCommand(rootCmd *cobra.Command, dockerCli *command.DockerCli) { +} diff --git a/cli/command/checkpoint/cmd_experimental.go b/cli/command/checkpoint/cmd_experimental.go new file mode 100644 index 0000000000..b7e614ca6f --- /dev/null +++ b/cli/command/checkpoint/cmd_experimental.go @@ -0,0 +1,31 @@ +// +build experimental + +package checkpoint + +import ( + "fmt" + + "github.com/spf13/cobra" + + "github.com/docker/docker/cli" + "github.com/docker/docker/cli/command" +) + +// NewCheckpointCommand returns a cobra command for `checkpoint` subcommands +func NewCheckpointCommand(rootCmd *cobra.Command, dockerCli *command.DockerCli) { + cmd := &cobra.Command{ + Use: "checkpoint", + Short: "Manage Container Checkpoints", + Args: cli.NoArgs, + Run: func(cmd *cobra.Command, args []string) { + fmt.Fprintf(dockerCli.Err(), "\n"+cmd.UsageString()) + }, + } + cmd.AddCommand( + newCreateCommand(dockerCli), + newListCommand(dockerCli), + newRemoveCommand(dockerCli), + ) + + rootCmd.AddCommand(cmd) +} diff --git a/cli/command/checkpoint/create.go b/cli/command/checkpoint/create.go new file mode 100644 index 0000000000..42b316fe2a --- /dev/null +++ b/cli/command/checkpoint/create.go @@ -0,0 +1,54 @@ +// +build experimental + +package checkpoint + +import ( + "golang.org/x/net/context" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/cli" + "github.com/docker/docker/cli/command" + "github.com/spf13/cobra" +) + +type createOptions struct { + container string + checkpoint string + leaveRunning bool +} + +func newCreateCommand(dockerCli *command.DockerCli) *cobra.Command { + var opts createOptions + + cmd := &cobra.Command{ + Use: "create CONTAINER CHECKPOINT", + Short: "Create a checkpoint from a running container", + Args: cli.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + opts.container = args[0] + opts.checkpoint = args[1] + return runCreate(dockerCli, opts) + }, + } + + flags := cmd.Flags() + flags.BoolVar(&opts.leaveRunning, "leave-running", false, "leave the container running after checkpoing") + + return cmd +} + +func runCreate(dockerCli *command.DockerCli, opts createOptions) error { + client := dockerCli.Client() + + checkpointOpts := types.CheckpointCreateOptions{ + CheckpointID: opts.checkpoint, + Exit: !opts.leaveRunning, + } + + err := client.CheckpointCreate(context.Background(), opts.container, checkpointOpts) + if err != nil { + return err + } + + return nil +} diff --git a/cli/command/checkpoint/list.go b/cli/command/checkpoint/list.go new file mode 100644 index 0000000000..6d22531d45 --- /dev/null +++ b/cli/command/checkpoint/list.go @@ -0,0 +1,47 @@ +// +build experimental + +package checkpoint + +import ( + "fmt" + "text/tabwriter" + + "golang.org/x/net/context" + + "github.com/docker/docker/cli" + "github.com/docker/docker/cli/command" + "github.com/spf13/cobra" +) + +func newListCommand(dockerCli *command.DockerCli) *cobra.Command { + return &cobra.Command{ + Use: "ls CONTAINER", + Aliases: []string{"list"}, + Short: "List checkpoints for a container", + Args: cli.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runList(dockerCli, args[0]) + }, + } +} + +func runList(dockerCli *command.DockerCli, container string) error { + client := dockerCli.Client() + + checkpoints, err := client.CheckpointList(context.Background(), container) + if err != nil { + return err + } + + w := tabwriter.NewWriter(dockerCli.Out(), 20, 1, 3, ' ', 0) + fmt.Fprintf(w, "CHECKPOINT NAME") + fmt.Fprintf(w, "\n") + + for _, checkpoint := range checkpoints { + fmt.Fprintf(w, "%s\t", checkpoint.Name) + fmt.Fprint(w, "\n") + } + + w.Flush() + return nil +} diff --git a/cli/command/checkpoint/remove.go b/cli/command/checkpoint/remove.go new file mode 100644 index 0000000000..6605c5e472 --- /dev/null +++ b/cli/command/checkpoint/remove.go @@ -0,0 +1,28 @@ +// +build experimental + +package checkpoint + +import ( + "golang.org/x/net/context" + + "github.com/docker/docker/cli" + "github.com/docker/docker/cli/command" + "github.com/spf13/cobra" +) + +func newRemoveCommand(dockerCli *command.DockerCli) *cobra.Command { + return &cobra.Command{ + Use: "rm CONTAINER CHECKPOINT", + Aliases: []string{"remove"}, + Short: "Remove a checkpoint", + Args: cli.ExactArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + return runRemove(dockerCli, args[0], args[1]) + }, + } +} + +func runRemove(dockerCli *command.DockerCli, container string, checkpoint string) error { + client := dockerCli.Client() + return client.CheckpointDelete(context.Background(), container, checkpoint) +} diff --git a/cli/command/commands/commands.go b/cli/command/commands/commands.go index 35fd6860b0..0adf8e3f3e 100644 --- a/cli/command/commands/commands.go +++ b/cli/command/commands/commands.go @@ -2,6 +2,7 @@ package commands import ( "github.com/docker/docker/cli/command" + "github.com/docker/docker/cli/command/checkpoint" "github.com/docker/docker/cli/command/container" "github.com/docker/docker/cli/command/image" "github.com/docker/docker/cli/command/network" @@ -67,5 +68,6 @@ func AddCommands(cmd *cobra.Command, dockerCli *command.DockerCli) { volume.NewVolumeCommand(dockerCli), system.NewInfoCommand(dockerCli), ) + checkpoint.NewCheckpointCommand(cmd, dockerCli) plugin.NewPluginCommand(cmd, dockerCli) } diff --git a/cli/command/container/start.go b/cli/command/container/start.go index e72369177a..9f414a7c66 100644 --- a/cli/command/container/start.go +++ b/cli/command/container/start.go @@ -20,6 +20,7 @@ type startOptions struct { attach bool openStdin bool detachKeys string + checkpoint string containers []string } @@ -42,6 +43,9 @@ func NewStartCommand(dockerCli *command.DockerCli) *cobra.Command { flags.BoolVarP(&opts.attach, "attach", "a", false, "Attach STDOUT/STDERR and forward signals") flags.BoolVarP(&opts.openStdin, "interactive", "i", false, "Attach container's STDIN") flags.StringVar(&opts.detachKeys, "detach-keys", "", "Override the key sequence for detaching a container") + + addExperimentalStartFlags(flags, &opts) + return cmd } @@ -105,9 +109,12 @@ func runStart(dockerCli *command.DockerCli, opts *startOptions) error { // 3. We should open a channel for receiving status code of the container // no matter it's detached, removed on daemon side(--rm) or exit normally. statusChan, statusErr := waitExitOrRemoved(dockerCli, context.Background(), c.ID, c.HostConfig.AutoRemove) + startOptions := types.ContainerStartOptions{ + CheckpointID: opts.checkpoint, + } // 4. Start the container. - if err := dockerCli.Client().ContainerStart(ctx, c.ID, types.ContainerStartOptions{}); err != nil { + if err := dockerCli.Client().ContainerStart(ctx, c.ID, startOptions); err != nil { cancelFun() <-cErr if c.HostConfig.AutoRemove && statusErr == nil { @@ -134,6 +141,16 @@ func runStart(dockerCli *command.DockerCli, opts *startOptions) error { if status := <-statusChan; status != 0 { return cli.StatusError{StatusCode: status} } + } else if opts.checkpoint != "" { + if len(opts.containers) > 1 { + return fmt.Errorf("You cannot restore multiple containers at once.") + } + container := opts.containers[0] + startOptions := types.ContainerStartOptions{ + CheckpointID: opts.checkpoint, + } + return dockerCli.Client().ContainerStart(ctx, container, startOptions) + } else { // We're not going to attach to anything. // Start as many containers as we want. diff --git a/cli/command/container/start_utils.go b/cli/command/container/start_utils.go new file mode 100644 index 0000000000..689d742f06 --- /dev/null +++ b/cli/command/container/start_utils.go @@ -0,0 +1,8 @@ +// +build !experimental + +package container + +import "github.com/spf13/pflag" + +func addExperimentalStartFlags(flags *pflag.FlagSet, opts *startOptions) { +} diff --git a/cli/command/container/start_utils_experimental.go b/cli/command/container/start_utils_experimental.go new file mode 100644 index 0000000000..43c64f431c --- /dev/null +++ b/cli/command/container/start_utils_experimental.go @@ -0,0 +1,9 @@ +// +build experimental + +package container + +import "github.com/spf13/pflag" + +func addExperimentalStartFlags(flags *pflag.FlagSet, opts *startOptions) { + flags.StringVar(&opts.checkpoint, "checkpoint", "", "Restore from this checkpoint") +} diff --git a/cmd/dockerd/daemon.go b/cmd/dockerd/daemon.go index 7c7424b25f..60357332f4 100644 --- a/cmd/dockerd/daemon.go +++ b/cmd/dockerd/daemon.go @@ -409,7 +409,7 @@ func initRouter(s *apiserver.Server, d *daemon.Daemon, c *cluster.Cluster) { if d.NetworkControllerEnabled() { routers = append(routers, network.NewRouter(d, c)) } - routers = addExperimentalRouters(routers) + routers = addExperimentalRouters(routers, d, decoder) s.InitRouter(utils.IsDebugEnabled(), routers...) } diff --git a/cmd/dockerd/routes.go b/cmd/dockerd/routes.go index 65b97bd8c2..767ff27ce6 100644 --- a/cmd/dockerd/routes.go +++ b/cmd/dockerd/routes.go @@ -2,8 +2,12 @@ package main -import "github.com/docker/docker/api/server/router" +import ( + "github.com/docker/docker/api/server/httputils" + "github.com/docker/docker/api/server/router" + "github.com/docker/docker/daemon" +) -func addExperimentalRouters(routers []router.Router) []router.Router { +func addExperimentalRouters(routers []router.Router, d *daemon.Daemon, decoder httputils.ContainerDecoder) []router.Router { return routers } diff --git a/cmd/dockerd/routes_experimental.go b/cmd/dockerd/routes_experimental.go index 665df9499a..adeb145cb2 100644 --- a/cmd/dockerd/routes_experimental.go +++ b/cmd/dockerd/routes_experimental.go @@ -3,11 +3,14 @@ package main import ( + "github.com/docker/docker/api/server/httputils" "github.com/docker/docker/api/server/router" + checkpointrouter "github.com/docker/docker/api/server/router/checkpoint" pluginrouter "github.com/docker/docker/api/server/router/plugin" + "github.com/docker/docker/daemon" "github.com/docker/docker/plugin" ) -func addExperimentalRouters(routers []router.Router) []router.Router { - return append(routers, pluginrouter.NewRouter(plugin.GetManager())) +func addExperimentalRouters(routers []router.Router, d *daemon.Daemon, decoder httputils.ContainerDecoder) []router.Router { + return append(routers, checkpointrouter.NewRouter(d, decoder), pluginrouter.NewRouter(plugin.GetManager())) } diff --git a/container/container.go b/container/container.go index 243450db3f..175f1b8154 100644 --- a/container/container.go +++ b/container/container.go @@ -306,6 +306,11 @@ func (container *Container) ConfigPath() (string, error) { return container.GetRootResourcePath(configFileName) } +// CheckpointDir returns the directory checkpoints are stored in +func (container *Container) CheckpointDir() string { + return filepath.Join(container.Root, "checkpoints") +} + // StartLogger starts a new logger driver for the container. func (container *Container) StartLogger(cfg containertypes.LogConfig) (logger.Logger, error) { c, err := logger.GetLogDriver(cfg.Type) diff --git a/daemon/checkpoint.go b/daemon/checkpoint.go new file mode 100644 index 0000000000..0c87b373b4 --- /dev/null +++ b/daemon/checkpoint.go @@ -0,0 +1,82 @@ +package daemon + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + + "github.com/docker/docker/api/types" +) + +// CheckpointCreate checkpoints the process running in a container with CRIU +func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreateOptions) error { + container, err := daemon.GetContainer(name) + if err != nil { + return err + } + + if !container.IsRunning() { + return fmt.Errorf("Container %s not running", name) + } + + err = daemon.containerd.CreateCheckpoint(container.ID, config.CheckpointID, container.CheckpointDir(), config.Exit) + if err != nil { + return fmt.Errorf("Cannot checkpoint container %s: %s", name, err) + } + + daemon.LogContainerEvent(container, "checkpoint") + + return nil +} + +// CheckpointDelete deletes the specified checkpoint +func (daemon *Daemon) CheckpointDelete(name string, checkpoint string) error { + container, err := daemon.GetContainer(name) + if err != nil { + return err + } + + checkpointDir := container.CheckpointDir() + return os.RemoveAll(filepath.Join(checkpointDir, checkpoint)) +} + +// CheckpointList deletes the specified checkpoint +func (daemon *Daemon) CheckpointList(name string) ([]types.Checkpoint, error) { + response := []types.Checkpoint{} + + container, err := daemon.GetContainer(name) + if err != nil { + return response, err + } + + checkpointDir := container.CheckpointDir() + if err := os.MkdirAll(checkpointDir, 0755); err != nil { + return nil, err + } + + dirs, err := ioutil.ReadDir(checkpointDir) + if err != nil { + return nil, err + } + + var out []types.Checkpoint + for _, d := range dirs { + if !d.IsDir() { + continue + } + path := filepath.Join(checkpointDir, d.Name(), "config.json") + data, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + var cpt types.Checkpoint + if err := json.Unmarshal(data, &cpt); err != nil { + return nil, err + } + out = append(out, cpt) + } + + return out, nil +} diff --git a/daemon/cluster/executor/backend.go b/daemon/cluster/executor/backend.go index 756e1bb30e..d8a7646e4d 100644 --- a/daemon/cluster/executor/backend.go +++ b/daemon/cluster/executor/backend.go @@ -24,7 +24,7 @@ type Backend interface { SetupIngress(req clustertypes.NetworkCreateRequest, nodeIP string) error PullImage(ctx context.Context, image, tag string, metaHeaders map[string][]string, authConfig *types.AuthConfig, outStream io.Writer) error CreateManagedContainer(config types.ContainerCreateConfig, validateHostname bool) (types.ContainerCreateResponse, error) - ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool) error + ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error ContainerStop(name string, seconds int) error ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error diff --git a/daemon/cluster/executor/container/adapter.go b/daemon/cluster/executor/container/adapter.go index bac9a1542c..b8531938a7 100644 --- a/daemon/cluster/executor/container/adapter.go +++ b/daemon/cluster/executor/container/adapter.go @@ -220,7 +220,7 @@ func (c *containerAdapter) create(ctx context.Context) error { func (c *containerAdapter) start(ctx context.Context) error { version := httputils.VersionFromContext(ctx) validateHostname := versions.GreaterThanOrEqualTo(version, "1.24") - return c.backend.ContainerStart(c.container.name(), nil, validateHostname) + return c.backend.ContainerStart(c.container.name(), nil, validateHostname, "") } func (c *containerAdapter) inspect(ctx context.Context) (types.ContainerJSON, error) { diff --git a/daemon/create.go b/daemon/create.go index 81cb7ac365..64f7527521 100644 --- a/daemon/create.go +++ b/daemon/create.go @@ -115,6 +115,9 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) ( if err := idtools.MkdirAs(container.Root, 0700, rootUID, rootGID); err != nil { return nil, err } + if err := idtools.MkdirAs(container.CheckpointDir(), 0700, rootUID, rootGID); err != nil { + return nil, err + } if err := daemon.setHostConfig(container, params.HostConfig); err != nil { return nil, err diff --git a/daemon/daemon.go b/daemon/daemon.go index 52e587eb43..e28faa6af7 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -287,7 +287,7 @@ func (daemon *Daemon) restore() error { // Make sure networks are available before starting daemon.waitForNetworks(c) - if err := daemon.containerStart(c); err != nil { + if err := daemon.containerStart(c, ""); err != nil { logrus.Errorf("Failed to start container %s: %s", c.ID, err) } close(chNotify) diff --git a/daemon/monitor_windows.go b/daemon/monitor_windows.go index b500ee60b9..0d4a818229 100644 --- a/daemon/monitor_windows.go +++ b/daemon/monitor_windows.go @@ -28,7 +28,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon // Create a new servicing container, which will start, complete the update, and merge back the // results if it succeeded, all as part of the below function call. - if err := daemon.containerd.Create((container.ID + "_servicing"), *spec, servicingOption); err != nil { + if err := daemon.containerd.Create((container.ID + "_servicing"), "", "", *spec, servicingOption); err != nil { container.SetExitCode(-1) return fmt.Errorf("Post-run update servicing failed: %s", err) } diff --git a/daemon/restart.go b/daemon/restart.go index 23a5d9981e..a34e731c41 100644 --- a/daemon/restart.go +++ b/daemon/restart.go @@ -56,7 +56,7 @@ func (daemon *Daemon) containerRestart(container *container.Container, seconds i } } - if err := daemon.containerStart(container); err != nil { + if err := daemon.containerStart(container, ""); err != nil { return err } diff --git a/daemon/start.go b/daemon/start.go index 561f0a09c3..488cbb5d54 100644 --- a/daemon/start.go +++ b/daemon/start.go @@ -19,7 +19,7 @@ import ( ) // ContainerStart starts a container. -func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, validateHostname bool) error { +func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, validateHostname bool, checkpoint string) error { container, err := daemon.GetContainer(name) if err != nil { return err @@ -78,19 +78,19 @@ func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.Hos return err } - return daemon.containerStart(container) + return daemon.containerStart(container, checkpoint) } // Start starts a container func (daemon *Daemon) Start(container *container.Container) error { - return daemon.containerStart(container) + return daemon.containerStart(container, "") } // containerStart prepares the container to run by setting up everything the // container needs, such as storage and networking, as well as links // between containers. The container is left waiting for a signal to // begin running. -func (daemon *Daemon) containerStart(container *container.Container) (err error) { +func (daemon *Daemon) containerStart(container *container.Container, checkpoint string) (err error) { container.Lock() defer container.Unlock() @@ -150,7 +150,7 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error) createOptions = append(createOptions, *copts...) } - if err := daemon.containerd.Create(container.ID, *spec, createOptions...); err != nil { + if err := daemon.containerd.Create(container.ID, checkpoint, container.CheckpointDir(), *spec, createOptions...); err != nil { errDesc := grpc.ErrorDesc(err) logrus.Errorf("Create container failed with error: %s", errDesc) // if we receive an internal error from the initial start of a container then lets diff --git a/experimental/README.md b/experimental/README.md index 3253cb5c41..abf54f2bd5 100644 --- a/experimental/README.md +++ b/experimental/README.md @@ -2,7 +2,7 @@ This page contains a list of features in the Docker engine which are experimental. Experimental features are **not** ready for production. They are -provided for test and evaluation in your sandbox environments. +provided for test and evaluation in your sandbox environments. The information below describes each feature and the GitHub pull requests and issues associated with it. If necessary, links are provided to additional @@ -74,9 +74,10 @@ to build a Docker binary with the experimental features enabled: * [External graphdriver plugins](plugins_graphdriver.md) * [Macvlan and Ipvlan Network Drivers](vlan-networks.md) * [Docker Stacks and Distributed Application Bundles](docker-stacks-and-bundles.md) + * [Checkpoint & Restore](checkpoint-restore.md) ## How to comment on an experimental feature -Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR. +Each feature's documentation includes a list of proposal pull requests or PRs associated with the feature. If you want to comment on or suggest a change to a feature, please add it to the existing feature PR. -Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user). +Issues or problems with a feature? Inquire for help on the `#docker` IRC channel or in on the [Docker Google group](https://groups.google.com/forum/#!forum/docker-user). diff --git a/experimental/checkpoint-restore.md b/experimental/checkpoint-restore.md new file mode 100644 index 0000000000..9c46e69cd6 --- /dev/null +++ b/experimental/checkpoint-restore.md @@ -0,0 +1,75 @@ +# Docker Checkpoint & Restore + +Checkpoint & Restore is a new feature that allows you to freeze a running +container by checkpointing it, which turns its state into a collection of files +on disk. Later, the container can be restored from the point it was frozen. + +This is accomplished using a tool called [CRIU](http://criu.org), which is an +external dependency of this feature. A good overview of the history of +checkpoint and restore in Docker is available in this +[Kubernetes blog post](http://blog.kubernetes.io/2015/07/how-did-quake-demo-from-dockercon-work.html). + +## Installing CRIU + +If you use a Debian system, you can add the CRIU PPA and install with apt-get +[from the criu launchpad](https://launchpad.net/~criu/+archive/ubuntu/ppa). + +Alternatively, you can [build CRIU from source](http://criu.org/Installation). + +You need at least version 2.0 of CRIU to run checkpoint/restore in Docker. + +## Use cases for checkpoint & restore + +This feature is currently focused on single-host use cases for checkpoint and +restore. Here are a few: + +- Restarting the host machine without stopping/starting containers +- Speeding up the start time of slow start applications +- "Rewinding" processes to an earlier point in time +- "Forensic debugging" of running processes + +Another primary use case of checkpoint & restore outside of Docker is the live +migration of a server from one machine to another. This is possible with the +current implementation, but not currently a priority (and so the workflow is +not optimized for the task). + +## Using Checkpoint & Restore + +A new top level commands `docker checkpoint` is introduced, with three subcommands: +- `create` (creates a new checkpoint) +- `ls` (lists existing checkpoints) +- `rm` (deletes an existing checkpoint) + +Additionally, a `--checkpoint` flag is added to the container start command. + +The options for checkpoint create: + + Usage: docker checkpoint [OPTIONS] CONTAINER CHECKPOINT_ID + + Checkpoint the specified container + + --leave-running=false leave the container running after checkpoint + +And to restore a container: + + Usage: docker start --checkpoint CHECKPOINT_ID [OTHER OPTIONS] CONTAINER + + +A simple example of using checkpoint & restore on a container: + + $ docker run --security-opt=seccomp:unconfined --name cr -d busybox /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done' + > abc0123 + + $ docker checkpoint create cr checkpoint1 + + # + $ docker start --checkpoint checkpoint1 cr + > abc0123 + +This process just logs an incrementing counter to stdout. If you `docker logs` +in between running/checkpoint/restoring you should see that the counter +increases while the process is running, stops while it's checkpointed, and +resumes from the point it left off once you restore. + +Note that seccomp is only supported by CRIU in very up to date kernels. + diff --git a/integration-cli/docker_cli_help_test.go b/integration-cli/docker_cli_help_test.go index e26f3536b2..5f916fedba 100644 --- a/integration-cli/docker_cli_help_test.go +++ b/integration-cli/docker_cli_help_test.go @@ -10,6 +10,7 @@ import ( "github.com/docker/docker/pkg/homedir" "github.com/docker/docker/pkg/integration/checker" icmd "github.com/docker/docker/pkg/integration/cmd" + "github.com/docker/docker/utils" "github.com/go-check/check" ) @@ -122,6 +123,12 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) { cmdsToTest = append(cmdsToTest, "network ls") cmdsToTest = append(cmdsToTest, "network rm") + if utils.ExperimentalBuild() { + cmdsToTest = append(cmdsToTest, "checkpoint create") + cmdsToTest = append(cmdsToTest, "checkpoint ls") + cmdsToTest = append(cmdsToTest, "checkpoint rm") + } + // Divide the list of commands into go routines and run the func testcommand on the commands in parallel // to save runtime of test diff --git a/libcontainerd/client_linux.go b/libcontainerd/client_linux.go index 6b25f5bb43..cdc1f69219 100644 --- a/libcontainerd/client_linux.go +++ b/libcontainerd/client_linux.go @@ -133,7 +133,7 @@ func (clnt *client) prepareBundleDir(uid, gid int) (string, error) { return p, nil } -func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) { +func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) (err error) { clnt.lock(containerID) defer clnt.unlock(containerID) @@ -180,7 +180,7 @@ func (clnt *client) Create(containerID string, spec Spec, options ...CreateOptio return err } - return container.start() + return container.start(checkpoint, checkpointDir) } func (clnt *client) Signal(containerID string, sig int) error { @@ -625,3 +625,57 @@ func (en *exitNotifier) close() { func (en *exitNotifier) wait() <-chan struct{} { return en.c } + +func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) + if _, err := clnt.getContainer(containerID); err != nil { + return err + } + + _, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{ + Id: containerID, + Checkpoint: &containerd.Checkpoint{ + Name: checkpointID, + Exit: exit, + Tcp: true, + UnixSockets: true, + Shell: false, + EmptyNS: []string{"network"}, + }, + CheckpointDir: checkpointDir, + }) + return err +} + +func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error { + clnt.lock(containerID) + defer clnt.unlock(containerID) + if _, err := clnt.getContainer(containerID); err != nil { + return err + } + + _, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{ + Id: containerID, + Name: checkpointID, + CheckpointDir: checkpointDir, + }) + return err +} + +func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) { + clnt.lock(containerID) + defer clnt.unlock(containerID) + if _, err := clnt.getContainer(containerID); err != nil { + return nil, err + } + + resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{ + Id: containerID, + CheckpointDir: checkpointDir, + }) + if err != nil { + return nil, err + } + return (*Checkpoints)(resp), nil +} diff --git a/libcontainerd/client_solaris.go b/libcontainerd/client_solaris.go index 1c14d301b5..df9106fab6 100644 --- a/libcontainerd/client_solaris.go +++ b/libcontainerd/client_solaris.go @@ -12,7 +12,7 @@ func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendly return nil } -func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) { +func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) (err error) { return nil } diff --git a/libcontainerd/client_windows.go b/libcontainerd/client_windows.go index b5be8ee469..0e0d1a78c8 100644 --- a/libcontainerd/client_windows.go +++ b/libcontainerd/client_windows.go @@ -37,7 +37,7 @@ const defaultOwner = "docker" // Create is the entrypoint to create a container from a spec, and if successfully // created, start it too. -func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) error { +func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) error { logrus.Debugln("libcontainerd: client.Create() with spec", spec) configuration := &hcsshim.ContainerConfig{ @@ -435,3 +435,15 @@ func (clnt *client) UpdateResources(containerID string, resources Resources) err // but we should return nil for enabling updating container return nil } + +func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error { + return errors.New("Windows: Containers do not support checkpoints") +} + +func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error { + return errors.New("Windows: Containers do not support checkpoints") +} + +func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) { + return nil, errors.New("Windows: Containers do not support checkpoints") +} diff --git a/libcontainerd/container_linux.go b/libcontainerd/container_linux.go index 4ed9f659dc..68344c551b 100644 --- a/libcontainerd/container_linux.go +++ b/libcontainerd/container_linux.go @@ -86,7 +86,7 @@ func (ctr *container) spec() (*specs.Spec, error) { return &spec, nil } -func (ctr *container) start() error { +func (ctr *container) start(checkpoint string, checkpointDir string) error { spec, err := ctr.spec() if err != nil { return nil @@ -97,11 +97,13 @@ func (ctr *container) start() error { } r := &containerd.CreateContainerRequest{ - Id: ctr.containerID, - BundlePath: ctr.dir, - Stdin: ctr.fifo(syscall.Stdin), - Stdout: ctr.fifo(syscall.Stdout), - Stderr: ctr.fifo(syscall.Stderr), + Id: ctr.containerID, + BundlePath: ctr.dir, + Stdin: ctr.fifo(syscall.Stdin), + Stdout: ctr.fifo(syscall.Stdout), + Stderr: ctr.fifo(syscall.Stderr), + Checkpoint: checkpoint, + CheckpointDir: checkpointDir, // check to see if we are running in ramdisk to disable pivot root NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "", Runtime: ctr.runtime, @@ -191,7 +193,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error { defer ctr.client.unlock(ctr.containerID) ctr.restarting = false if err == nil { - if err = ctr.start(); err != nil { + if err = ctr.start("", ""); err != nil { logrus.Errorf("libcontainerd: error restarting %v", err) } } diff --git a/libcontainerd/container_windows.go b/libcontainerd/container_windows.go index 171b8afda3..ffbadb5ce9 100644 --- a/libcontainerd/container_windows.go +++ b/libcontainerd/container_windows.go @@ -261,7 +261,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err ctr.restarting = false ctr.client.deleteContainer(ctr.friendlyName) if err == nil { - if err = ctr.client.Create(ctr.containerID, ctr.ociSpec, ctr.options...); err != nil { + if err = ctr.client.Create(ctr.containerID, "", "", ctr.ociSpec, ctr.options...); err != nil { logrus.Errorf("libcontainerd: error restarting %v", err) } } diff --git a/libcontainerd/types.go b/libcontainerd/types.go index 6f452c1c3b..fcb971137d 100644 --- a/libcontainerd/types.go +++ b/libcontainerd/types.go @@ -36,7 +36,7 @@ type Backend interface { // Client provides access to containerd features. type Client interface { - Create(containerID string, spec Spec, options ...CreateOption) error + Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) error Signal(containerID string, sig int) error SignalProcess(containerID string, processFriendlyName string, sig int) error AddProcess(ctx context.Context, containerID, processFriendlyName string, process Process) error @@ -48,6 +48,9 @@ type Client interface { GetPidsForContainer(containerID string) ([]int, error) Summary(containerID string) ([]Summary, error) UpdateResources(containerID string, resources Resources) error + CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error + DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error + ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) } // CreateOption allows to configure parameters of container creation. diff --git a/libcontainerd/types_linux.go b/libcontainerd/types_linux.go index bc19ece812..10893f7f66 100644 --- a/libcontainerd/types_linux.go +++ b/libcontainerd/types_linux.go @@ -53,3 +53,6 @@ type User specs.User // Resources defines updatable container resource values. type Resources containerd.UpdateResource + +// Checkpoints contains the details of a checkpoint +type Checkpoints containerd.ListCheckpointResponse diff --git a/libcontainerd/types_windows.go b/libcontainerd/types_windows.go index abe10e4f5f..653e2c61d5 100644 --- a/libcontainerd/types_windows.go +++ b/libcontainerd/types_windows.go @@ -37,3 +37,13 @@ type Resources struct{} type ServicingOption struct { IsServicing bool } + +// Checkpoint holds the details of a checkpoint (not supported in windows) +type Checkpoint struct { + Name string +} + +// Checkpoints contains the details of a checkpoint +type Checkpoints struct { + Checkpoints []*Checkpoint +} diff --git a/plugin/manager_linux.go b/plugin/manager_linux.go index 301b522149..a2adfe8022 100644 --- a/plugin/manager_linux.go +++ b/plugin/manager_linux.go @@ -27,7 +27,7 @@ func (pm *Manager) enable(p *v2.Plugin, force bool) error { } p.RestartManager = restartmanager.New(container.RestartPolicy{Name: "always"}, 0) - if err := pm.containerdClient.Create(p.GetID(), libcontainerd.Spec(*spec), libcontainerd.WithRestartManager(p.RestartManager)); err != nil { + if err := pm.containerdClient.Create(p.GetID(), "", "", libcontainerd.Spec(*spec), libcontainerd.WithRestartManager(p.RestartManager)); err != nil { if err := p.RestartManager.Cancel(); err != nil { logrus.Errorf("enable: restartManager.Cancel failed due to %v", err) }