Initial implementation of containerd Checkpoint API.

Signed-off-by: boucher <rboucher@gmail.com>
This commit is contained in:
boucher 2016-05-12 10:52:00 -04:00
Родитель e345d67c4e
Коммит d8fef66b03
43 изменённых файлов: 659 добавлений и 38 удалений

Просмотреть файл

@ -57,12 +57,17 @@ RUN apt-get update && apt-get install -y \
libapparmor-dev \
libcap-dev \
libltdl-dev \
libnl-3-dev \
libprotobuf-c0-dev \
libprotobuf-dev \
libsqlite3-dev \
libsystemd-journal-dev \
libtool \
mercurial \
net-tools \
pkg-config \
protobuf-compiler \
protobuf-c-compiler \
python-dev \
python-mock \
python-pip \
@ -145,6 +150,14 @@ RUN git clone https://github.com/golang/lint.git /go/src/github.com/golang/lint
&& (cd /go/src/github.com/golang/lint && git checkout -q $GO_LINT_COMMIT) \
&& go install -v github.com/golang/lint/golint
# Install CRIU for checkpoint/restore support
ENV CRIU_VERSION 2.2
RUN mkdir -p /usr/src/criu \
&& curl -sSL https://github.com/xemul/criu/archive/v${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
&& cd /usr/src/criu \
&& make \
&& make install-criu
# Install two versions of the registry. The first is an older version that
# only supports schema1 manifests. The second is a newer version that supports
# both. This allows integration-cli tests to cover push/pull with both schema1

Просмотреть файл

@ -0,0 +1,12 @@
// +build experimental
package checkpoint
import "github.com/docker/docker/api/types"
// Backend for Checkpoint
type Backend interface {
CheckpointCreate(container string, config types.CheckpointCreateOptions) error
CheckpointDelete(container string, checkpointID string) error
CheckpointList(container string) ([]types.Checkpoint, error)
}

Просмотреть файл

@ -0,0 +1,28 @@
package checkpoint
import (
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/server/router"
)
// checkpointRouter is a router to talk with the checkpoint controller
type checkpointRouter struct {
backend Backend
decoder httputils.ContainerDecoder
routes []router.Route
}
// NewRouter initializes a new checkpoint router
func NewRouter(b Backend, decoder httputils.ContainerDecoder) router.Router {
r := &checkpointRouter{
backend: b,
decoder: decoder,
}
r.initRoutes()
return r
}
// Routes returns the available routers to the checkpoint controller
func (r *checkpointRouter) Routes() []router.Route {
return r.routes
}

Просмотреть файл

@ -0,0 +1,15 @@
// +build experimental
package checkpoint
import (
"github.com/docker/docker/api/server/router"
)
func (r *checkpointRouter) initRoutes() {
r.routes = []router.Route{
router.NewGetRoute("/containers/{name:.*}/checkpoints", r.getContainerCheckpoints),
router.NewPostRoute("/containers/{name:.*}/checkpoints", r.postContainerCheckpoint),
router.NewDeleteRoute("/containers/{name:.*}/checkpoints/{checkpoint:.*}", r.deleteContainerCheckpoint),
}
}

Просмотреть файл

@ -0,0 +1,8 @@
// +build !experimental
package checkpoint
func (r *checkpointRouter) initRoutes() {}
// Backend is empty so that the package can compile in non-experimental
type Backend interface{}

Просмотреть файл

@ -0,0 +1,60 @@
// +build experimental
package checkpoint
import (
"encoding/json"
"net/http"
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/types"
"golang.org/x/net/context"
)
func (s *checkpointRouter) postContainerCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := httputils.ParseForm(r); err != nil {
return err
}
var options types.CheckpointCreateOptions
decoder := json.NewDecoder(r.Body)
if err := decoder.Decode(&options); err != nil {
return err
}
err := s.backend.CheckpointCreate(vars["name"], options)
if err != nil {
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}
func (s *checkpointRouter) getContainerCheckpoints(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := httputils.ParseForm(r); err != nil {
return err
}
checkpoints, err := s.backend.CheckpointList(vars["name"])
if err != nil {
return err
}
return httputils.WriteJSON(w, http.StatusOK, checkpoints)
}
func (s *checkpointRouter) deleteContainerCheckpoint(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if err := httputils.ParseForm(r); err != nil {
return err
}
err := s.backend.CheckpointDelete(vars["name"], vars["checkpoint"])
if err != nil {
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}

Просмотреть файл

@ -39,7 +39,7 @@ type stateBackend interface {
ContainerResize(name string, height, width int) error
ContainerRestart(name string, seconds int) error
ContainerRm(name string, config *types.ContainerRmConfig) error
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool) error
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
ContainerStop(name string, seconds int) error
ContainerUnpause(name string) error
ContainerUpdate(name string, hostConfig *container.HostConfig, validateHostname bool) (types.ContainerUpdateResponse, error)

Просмотреть файл

@ -151,10 +151,16 @@ func (s *containerRouter) postContainersStart(ctx context.Context, w http.Respon
hostConfig = c
}
validateHostname := versions.GreaterThanOrEqualTo(version, "1.24")
if err := s.backend.ContainerStart(vars["name"], hostConfig, validateHostname); err != nil {
if err := httputils.ParseForm(r); err != nil {
return err
}
checkpoint := r.Form.Get("checkpoint")
validateHostname := versions.GreaterThanOrEqualTo(version, "1.24")
if err := s.backend.ContainerStart(vars["name"], hostConfig, validateHostname, checkpoint); err != nil {
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}

Просмотреть файл

@ -124,12 +124,19 @@ type Backend interface {
// ContainerKill stops the container execution abruptly.
ContainerKill(containerID string, sig uint64) error
// ContainerStart starts a new container
ContainerStart(containerID string, hostConfig *container.HostConfig, validateHostname bool) error
ContainerStart(containerID string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
// ContainerWait stops processing until the given container is stopped.
ContainerWait(containerID string, timeout time.Duration) (int, error)
// ContainerUpdateCmdOnBuild updates container.Path and container.Args
ContainerUpdateCmdOnBuild(containerID string, cmd []string) error
// CheckpointCreate checkpoints a running container
CheckpointCreate(container string, config types.CheckpointCreateOptions) error
// CheckpointDelete deletes a container's checkpoint
CheckpointDelete(container string, checkpoint string) error
// CheckpointList lists the available checkpoints for a container
CheckpointList(container string) ([]types.Checkpoint, error)
// ContainerCopy copies/extracts a source FileInfo to a destination path inside a container
// specified by a container object.
// TODO: make an Extract method instead of passing `decompress`

Просмотреть файл

@ -555,7 +555,7 @@ func (b *Builder) run(cID string) (err error) {
}
}()
if err := b.docker.ContainerStart(cID, nil, true); err != nil {
if err := b.docker.ContainerStart(cID, nil, true, ""); err != nil {
return err
}

Просмотреть файл

@ -0,0 +1,12 @@
// +build !experimental
package checkpoint
import (
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
// NewCheckpointCommand returns a cobra command for `checkpoint` subcommands
func NewCheckpointCommand(rootCmd *cobra.Command, dockerCli *command.DockerCli) {
}

Просмотреть файл

@ -0,0 +1,31 @@
// +build experimental
package checkpoint
import (
"fmt"
"github.com/spf13/cobra"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
)
// NewCheckpointCommand returns a cobra command for `checkpoint` subcommands
func NewCheckpointCommand(rootCmd *cobra.Command, dockerCli *command.DockerCli) {
cmd := &cobra.Command{
Use: "checkpoint",
Short: "Manage Container Checkpoints",
Args: cli.NoArgs,
Run: func(cmd *cobra.Command, args []string) {
fmt.Fprintf(dockerCli.Err(), "\n"+cmd.UsageString())
},
}
cmd.AddCommand(
newCreateCommand(dockerCli),
newListCommand(dockerCli),
newRemoveCommand(dockerCli),
)
rootCmd.AddCommand(cmd)
}

Просмотреть файл

@ -0,0 +1,54 @@
// +build experimental
package checkpoint
import (
"golang.org/x/net/context"
"github.com/docker/docker/api/types"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
type createOptions struct {
container string
checkpoint string
leaveRunning bool
}
func newCreateCommand(dockerCli *command.DockerCli) *cobra.Command {
var opts createOptions
cmd := &cobra.Command{
Use: "create CONTAINER CHECKPOINT",
Short: "Create a checkpoint from a running container",
Args: cli.ExactArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
opts.container = args[0]
opts.checkpoint = args[1]
return runCreate(dockerCli, opts)
},
}
flags := cmd.Flags()
flags.BoolVar(&opts.leaveRunning, "leave-running", false, "leave the container running after checkpoing")
return cmd
}
func runCreate(dockerCli *command.DockerCli, opts createOptions) error {
client := dockerCli.Client()
checkpointOpts := types.CheckpointCreateOptions{
CheckpointID: opts.checkpoint,
Exit: !opts.leaveRunning,
}
err := client.CheckpointCreate(context.Background(), opts.container, checkpointOpts)
if err != nil {
return err
}
return nil
}

Просмотреть файл

@ -0,0 +1,47 @@
// +build experimental
package checkpoint
import (
"fmt"
"text/tabwriter"
"golang.org/x/net/context"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
func newListCommand(dockerCli *command.DockerCli) *cobra.Command {
return &cobra.Command{
Use: "ls CONTAINER",
Aliases: []string{"list"},
Short: "List checkpoints for a container",
Args: cli.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return runList(dockerCli, args[0])
},
}
}
func runList(dockerCli *command.DockerCli, container string) error {
client := dockerCli.Client()
checkpoints, err := client.CheckpointList(context.Background(), container)
if err != nil {
return err
}
w := tabwriter.NewWriter(dockerCli.Out(), 20, 1, 3, ' ', 0)
fmt.Fprintf(w, "CHECKPOINT NAME")
fmt.Fprintf(w, "\n")
for _, checkpoint := range checkpoints {
fmt.Fprintf(w, "%s\t", checkpoint.Name)
fmt.Fprint(w, "\n")
}
w.Flush()
return nil
}

Просмотреть файл

@ -0,0 +1,28 @@
// +build experimental
package checkpoint
import (
"golang.org/x/net/context"
"github.com/docker/docker/cli"
"github.com/docker/docker/cli/command"
"github.com/spf13/cobra"
)
func newRemoveCommand(dockerCli *command.DockerCli) *cobra.Command {
return &cobra.Command{
Use: "rm CONTAINER CHECKPOINT",
Aliases: []string{"remove"},
Short: "Remove a checkpoint",
Args: cli.ExactArgs(2),
RunE: func(cmd *cobra.Command, args []string) error {
return runRemove(dockerCli, args[0], args[1])
},
}
}
func runRemove(dockerCli *command.DockerCli, container string, checkpoint string) error {
client := dockerCli.Client()
return client.CheckpointDelete(context.Background(), container, checkpoint)
}

Просмотреть файл

@ -2,6 +2,7 @@ package commands
import (
"github.com/docker/docker/cli/command"
"github.com/docker/docker/cli/command/checkpoint"
"github.com/docker/docker/cli/command/container"
"github.com/docker/docker/cli/command/image"
"github.com/docker/docker/cli/command/network"
@ -67,5 +68,6 @@ func AddCommands(cmd *cobra.Command, dockerCli *command.DockerCli) {
volume.NewVolumeCommand(dockerCli),
system.NewInfoCommand(dockerCli),
)
checkpoint.NewCheckpointCommand(cmd, dockerCli)
plugin.NewPluginCommand(cmd, dockerCli)
}

Просмотреть файл

@ -20,6 +20,7 @@ type startOptions struct {
attach bool
openStdin bool
detachKeys string
checkpoint string
containers []string
}
@ -42,6 +43,9 @@ func NewStartCommand(dockerCli *command.DockerCli) *cobra.Command {
flags.BoolVarP(&opts.attach, "attach", "a", false, "Attach STDOUT/STDERR and forward signals")
flags.BoolVarP(&opts.openStdin, "interactive", "i", false, "Attach container's STDIN")
flags.StringVar(&opts.detachKeys, "detach-keys", "", "Override the key sequence for detaching a container")
addExperimentalStartFlags(flags, &opts)
return cmd
}
@ -105,9 +109,12 @@ func runStart(dockerCli *command.DockerCli, opts *startOptions) error {
// 3. We should open a channel for receiving status code of the container
// no matter it's detached, removed on daemon side(--rm) or exit normally.
statusChan, statusErr := waitExitOrRemoved(dockerCli, context.Background(), c.ID, c.HostConfig.AutoRemove)
startOptions := types.ContainerStartOptions{
CheckpointID: opts.checkpoint,
}
// 4. Start the container.
if err := dockerCli.Client().ContainerStart(ctx, c.ID, types.ContainerStartOptions{}); err != nil {
if err := dockerCli.Client().ContainerStart(ctx, c.ID, startOptions); err != nil {
cancelFun()
<-cErr
if c.HostConfig.AutoRemove && statusErr == nil {
@ -134,6 +141,16 @@ func runStart(dockerCli *command.DockerCli, opts *startOptions) error {
if status := <-statusChan; status != 0 {
return cli.StatusError{StatusCode: status}
}
} else if opts.checkpoint != "" {
if len(opts.containers) > 1 {
return fmt.Errorf("You cannot restore multiple containers at once.")
}
container := opts.containers[0]
startOptions := types.ContainerStartOptions{
CheckpointID: opts.checkpoint,
}
return dockerCli.Client().ContainerStart(ctx, container, startOptions)
} else {
// We're not going to attach to anything.
// Start as many containers as we want.

Просмотреть файл

@ -0,0 +1,8 @@
// +build !experimental
package container
import "github.com/spf13/pflag"
func addExperimentalStartFlags(flags *pflag.FlagSet, opts *startOptions) {
}

Просмотреть файл

@ -0,0 +1,9 @@
// +build experimental
package container
import "github.com/spf13/pflag"
func addExperimentalStartFlags(flags *pflag.FlagSet, opts *startOptions) {
flags.StringVar(&opts.checkpoint, "checkpoint", "", "Restore from this checkpoint")
}

Просмотреть файл

@ -409,7 +409,7 @@ func initRouter(s *apiserver.Server, d *daemon.Daemon, c *cluster.Cluster) {
if d.NetworkControllerEnabled() {
routers = append(routers, network.NewRouter(d, c))
}
routers = addExperimentalRouters(routers)
routers = addExperimentalRouters(routers, d, decoder)
s.InitRouter(utils.IsDebugEnabled(), routers...)
}

Просмотреть файл

@ -2,8 +2,12 @@
package main
import "github.com/docker/docker/api/server/router"
import (
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/server/router"
"github.com/docker/docker/daemon"
)
func addExperimentalRouters(routers []router.Router) []router.Router {
func addExperimentalRouters(routers []router.Router, d *daemon.Daemon, decoder httputils.ContainerDecoder) []router.Router {
return routers
}

Просмотреть файл

@ -3,11 +3,14 @@
package main
import (
"github.com/docker/docker/api/server/httputils"
"github.com/docker/docker/api/server/router"
checkpointrouter "github.com/docker/docker/api/server/router/checkpoint"
pluginrouter "github.com/docker/docker/api/server/router/plugin"
"github.com/docker/docker/daemon"
"github.com/docker/docker/plugin"
)
func addExperimentalRouters(routers []router.Router) []router.Router {
return append(routers, pluginrouter.NewRouter(plugin.GetManager()))
func addExperimentalRouters(routers []router.Router, d *daemon.Daemon, decoder httputils.ContainerDecoder) []router.Router {
return append(routers, checkpointrouter.NewRouter(d, decoder), pluginrouter.NewRouter(plugin.GetManager()))
}

Просмотреть файл

@ -306,6 +306,11 @@ func (container *Container) ConfigPath() (string, error) {
return container.GetRootResourcePath(configFileName)
}
// CheckpointDir returns the directory checkpoints are stored in
func (container *Container) CheckpointDir() string {
return filepath.Join(container.Root, "checkpoints")
}
// StartLogger starts a new logger driver for the container.
func (container *Container) StartLogger(cfg containertypes.LogConfig) (logger.Logger, error) {
c, err := logger.GetLogDriver(cfg.Type)

82
daemon/checkpoint.go Normal file
Просмотреть файл

@ -0,0 +1,82 @@
package daemon
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/docker/docker/api/types"
)
// CheckpointCreate checkpoints the process running in a container with CRIU
func (daemon *Daemon) CheckpointCreate(name string, config types.CheckpointCreateOptions) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
}
if !container.IsRunning() {
return fmt.Errorf("Container %s not running", name)
}
err = daemon.containerd.CreateCheckpoint(container.ID, config.CheckpointID, container.CheckpointDir(), config.Exit)
if err != nil {
return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
}
daemon.LogContainerEvent(container, "checkpoint")
return nil
}
// CheckpointDelete deletes the specified checkpoint
func (daemon *Daemon) CheckpointDelete(name string, checkpoint string) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
}
checkpointDir := container.CheckpointDir()
return os.RemoveAll(filepath.Join(checkpointDir, checkpoint))
}
// CheckpointList deletes the specified checkpoint
func (daemon *Daemon) CheckpointList(name string) ([]types.Checkpoint, error) {
response := []types.Checkpoint{}
container, err := daemon.GetContainer(name)
if err != nil {
return response, err
}
checkpointDir := container.CheckpointDir()
if err := os.MkdirAll(checkpointDir, 0755); err != nil {
return nil, err
}
dirs, err := ioutil.ReadDir(checkpointDir)
if err != nil {
return nil, err
}
var out []types.Checkpoint
for _, d := range dirs {
if !d.IsDir() {
continue
}
path := filepath.Join(checkpointDir, d.Name(), "config.json")
data, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
var cpt types.Checkpoint
if err := json.Unmarshal(data, &cpt); err != nil {
return nil, err
}
out = append(out, cpt)
}
return out, nil
}

Просмотреть файл

@ -24,7 +24,7 @@ type Backend interface {
SetupIngress(req clustertypes.NetworkCreateRequest, nodeIP string) error
PullImage(ctx context.Context, image, tag string, metaHeaders map[string][]string, authConfig *types.AuthConfig, outStream io.Writer) error
CreateManagedContainer(config types.ContainerCreateConfig, validateHostname bool) (types.ContainerCreateResponse, error)
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool) error
ContainerStart(name string, hostConfig *container.HostConfig, validateHostname bool, checkpoint string) error
ContainerStop(name string, seconds int) error
ConnectContainerToNetwork(containerName, networkName string, endpointConfig *network.EndpointSettings) error
UpdateContainerServiceConfig(containerName string, serviceConfig *clustertypes.ServiceConfig) error

Просмотреть файл

@ -220,7 +220,7 @@ func (c *containerAdapter) create(ctx context.Context) error {
func (c *containerAdapter) start(ctx context.Context) error {
version := httputils.VersionFromContext(ctx)
validateHostname := versions.GreaterThanOrEqualTo(version, "1.24")
return c.backend.ContainerStart(c.container.name(), nil, validateHostname)
return c.backend.ContainerStart(c.container.name(), nil, validateHostname, "")
}
func (c *containerAdapter) inspect(ctx context.Context) (types.ContainerJSON, error) {

Просмотреть файл

@ -115,6 +115,9 @@ func (daemon *Daemon) create(params types.ContainerCreateConfig, managed bool) (
if err := idtools.MkdirAs(container.Root, 0700, rootUID, rootGID); err != nil {
return nil, err
}
if err := idtools.MkdirAs(container.CheckpointDir(), 0700, rootUID, rootGID); err != nil {
return nil, err
}
if err := daemon.setHostConfig(container, params.HostConfig); err != nil {
return nil, err

Просмотреть файл

@ -287,7 +287,7 @@ func (daemon *Daemon) restore() error {
// Make sure networks are available before starting
daemon.waitForNetworks(c)
if err := daemon.containerStart(c); err != nil {
if err := daemon.containerStart(c, ""); err != nil {
logrus.Errorf("Failed to start container %s: %s", c.ID, err)
}
close(chNotify)

Просмотреть файл

@ -28,7 +28,7 @@ func (daemon *Daemon) postRunProcessing(container *container.Container, e libcon
// Create a new servicing container, which will start, complete the update, and merge back the
// results if it succeeded, all as part of the below function call.
if err := daemon.containerd.Create((container.ID + "_servicing"), *spec, servicingOption); err != nil {
if err := daemon.containerd.Create((container.ID + "_servicing"), "", "", *spec, servicingOption); err != nil {
container.SetExitCode(-1)
return fmt.Errorf("Post-run update servicing failed: %s", err)
}

Просмотреть файл

@ -56,7 +56,7 @@ func (daemon *Daemon) containerRestart(container *container.Container, seconds i
}
}
if err := daemon.containerStart(container); err != nil {
if err := daemon.containerStart(container, ""); err != nil {
return err
}

Просмотреть файл

@ -19,7 +19,7 @@ import (
)
// ContainerStart starts a container.
func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, validateHostname bool) error {
func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, validateHostname bool, checkpoint string) error {
container, err := daemon.GetContainer(name)
if err != nil {
return err
@ -78,19 +78,19 @@ func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.Hos
return err
}
return daemon.containerStart(container)
return daemon.containerStart(container, checkpoint)
}
// Start starts a container
func (daemon *Daemon) Start(container *container.Container) error {
return daemon.containerStart(container)
return daemon.containerStart(container, "")
}
// containerStart prepares the container to run by setting up everything the
// container needs, such as storage and networking, as well as links
// between containers. The container is left waiting for a signal to
// begin running.
func (daemon *Daemon) containerStart(container *container.Container) (err error) {
func (daemon *Daemon) containerStart(container *container.Container, checkpoint string) (err error) {
container.Lock()
defer container.Unlock()
@ -150,7 +150,7 @@ func (daemon *Daemon) containerStart(container *container.Container) (err error)
createOptions = append(createOptions, *copts...)
}
if err := daemon.containerd.Create(container.ID, *spec, createOptions...); err != nil {
if err := daemon.containerd.Create(container.ID, checkpoint, container.CheckpointDir(), *spec, createOptions...); err != nil {
errDesc := grpc.ErrorDesc(err)
logrus.Errorf("Create container failed with error: %s", errDesc)
// if we receive an internal error from the initial start of a container then lets

Просмотреть файл

@ -74,6 +74,7 @@ to build a Docker binary with the experimental features enabled:
* [External graphdriver plugins](plugins_graphdriver.md)
* [Macvlan and Ipvlan Network Drivers](vlan-networks.md)
* [Docker Stacks and Distributed Application Bundles](docker-stacks-and-bundles.md)
* [Checkpoint & Restore](checkpoint-restore.md)
## How to comment on an experimental feature

Просмотреть файл

@ -0,0 +1,75 @@
# Docker Checkpoint & Restore
Checkpoint & Restore is a new feature that allows you to freeze a running
container by checkpointing it, which turns its state into a collection of files
on disk. Later, the container can be restored from the point it was frozen.
This is accomplished using a tool called [CRIU](http://criu.org), which is an
external dependency of this feature. A good overview of the history of
checkpoint and restore in Docker is available in this
[Kubernetes blog post](http://blog.kubernetes.io/2015/07/how-did-quake-demo-from-dockercon-work.html).
## Installing CRIU
If you use a Debian system, you can add the CRIU PPA and install with apt-get
[from the criu launchpad](https://launchpad.net/~criu/+archive/ubuntu/ppa).
Alternatively, you can [build CRIU from source](http://criu.org/Installation).
You need at least version 2.0 of CRIU to run checkpoint/restore in Docker.
## Use cases for checkpoint & restore
This feature is currently focused on single-host use cases for checkpoint and
restore. Here are a few:
- Restarting the host machine without stopping/starting containers
- Speeding up the start time of slow start applications
- "Rewinding" processes to an earlier point in time
- "Forensic debugging" of running processes
Another primary use case of checkpoint & restore outside of Docker is the live
migration of a server from one machine to another. This is possible with the
current implementation, but not currently a priority (and so the workflow is
not optimized for the task).
## Using Checkpoint & Restore
A new top level commands `docker checkpoint` is introduced, with three subcommands:
- `create` (creates a new checkpoint)
- `ls` (lists existing checkpoints)
- `rm` (deletes an existing checkpoint)
Additionally, a `--checkpoint` flag is added to the container start command.
The options for checkpoint create:
Usage: docker checkpoint [OPTIONS] CONTAINER CHECKPOINT_ID
Checkpoint the specified container
--leave-running=false leave the container running after checkpoint
And to restore a container:
Usage: docker start --checkpoint CHECKPOINT_ID [OTHER OPTIONS] CONTAINER
A simple example of using checkpoint & restore on a container:
$ docker run --security-opt=seccomp:unconfined --name cr -d busybox /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done'
> abc0123
$ docker checkpoint create cr checkpoint1
# <later>
$ docker start --checkpoint checkpoint1 cr
> abc0123
This process just logs an incrementing counter to stdout. If you `docker logs`
in between running/checkpoint/restoring you should see that the counter
increases while the process is running, stops while it's checkpointed, and
resumes from the point it left off once you restore.
Note that seccomp is only supported by CRIU in very up to date kernels.

Просмотреть файл

@ -10,6 +10,7 @@ import (
"github.com/docker/docker/pkg/homedir"
"github.com/docker/docker/pkg/integration/checker"
icmd "github.com/docker/docker/pkg/integration/cmd"
"github.com/docker/docker/utils"
"github.com/go-check/check"
)
@ -122,6 +123,12 @@ func (s *DockerSuite) TestHelpTextVerify(c *check.C) {
cmdsToTest = append(cmdsToTest, "network ls")
cmdsToTest = append(cmdsToTest, "network rm")
if utils.ExperimentalBuild() {
cmdsToTest = append(cmdsToTest, "checkpoint create")
cmdsToTest = append(cmdsToTest, "checkpoint ls")
cmdsToTest = append(cmdsToTest, "checkpoint rm")
}
// Divide the list of commands into go routines and run the func testcommand on the commands in parallel
// to save runtime of test

Просмотреть файл

@ -133,7 +133,7 @@ func (clnt *client) prepareBundleDir(uid, gid int) (string, error) {
return p, nil
}
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) {
func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) (err error) {
clnt.lock(containerID)
defer clnt.unlock(containerID)
@ -180,7 +180,7 @@ func (clnt *client) Create(containerID string, spec Spec, options ...CreateOptio
return err
}
return container.start()
return container.start(checkpoint, checkpointDir)
}
func (clnt *client) Signal(containerID string, sig int) error {
@ -625,3 +625,57 @@ func (en *exitNotifier) close() {
func (en *exitNotifier) wait() <-chan struct{} {
return en.c
}
func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)
if _, err := clnt.getContainer(containerID); err != nil {
return err
}
_, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{
Id: containerID,
Checkpoint: &containerd.Checkpoint{
Name: checkpointID,
Exit: exit,
Tcp: true,
UnixSockets: true,
Shell: false,
EmptyNS: []string{"network"},
},
CheckpointDir: checkpointDir,
})
return err
}
func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
clnt.lock(containerID)
defer clnt.unlock(containerID)
if _, err := clnt.getContainer(containerID); err != nil {
return err
}
_, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{
Id: containerID,
Name: checkpointID,
CheckpointDir: checkpointDir,
})
return err
}
func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
clnt.lock(containerID)
defer clnt.unlock(containerID)
if _, err := clnt.getContainer(containerID); err != nil {
return nil, err
}
resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{
Id: containerID,
CheckpointDir: checkpointDir,
})
if err != nil {
return nil, err
}
return (*Checkpoints)(resp), nil
}

Просмотреть файл

@ -12,7 +12,7 @@ func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendly
return nil
}
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) (err error) {
func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) (err error) {
return nil
}

Просмотреть файл

@ -37,7 +37,7 @@ const defaultOwner = "docker"
// Create is the entrypoint to create a container from a spec, and if successfully
// created, start it too.
func (clnt *client) Create(containerID string, spec Spec, options ...CreateOption) error {
func (clnt *client) Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) error {
logrus.Debugln("libcontainerd: client.Create() with spec", spec)
configuration := &hcsshim.ContainerConfig{
@ -435,3 +435,15 @@ func (clnt *client) UpdateResources(containerID string, resources Resources) err
// but we should return nil for enabling updating container
return nil
}
func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error {
return errors.New("Windows: Containers do not support checkpoints")
}
func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error {
return errors.New("Windows: Containers do not support checkpoints")
}
func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) {
return nil, errors.New("Windows: Containers do not support checkpoints")
}

Просмотреть файл

@ -86,7 +86,7 @@ func (ctr *container) spec() (*specs.Spec, error) {
return &spec, nil
}
func (ctr *container) start() error {
func (ctr *container) start(checkpoint string, checkpointDir string) error {
spec, err := ctr.spec()
if err != nil {
return nil
@ -102,6 +102,8 @@ func (ctr *container) start() error {
Stdin: ctr.fifo(syscall.Stdin),
Stdout: ctr.fifo(syscall.Stdout),
Stderr: ctr.fifo(syscall.Stderr),
Checkpoint: checkpoint,
CheckpointDir: checkpointDir,
// check to see if we are running in ramdisk to disable pivot root
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
Runtime: ctr.runtime,
@ -191,7 +193,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
defer ctr.client.unlock(ctr.containerID)
ctr.restarting = false
if err == nil {
if err = ctr.start(); err != nil {
if err = ctr.start("", ""); err != nil {
logrus.Errorf("libcontainerd: error restarting %v", err)
}
}

Просмотреть файл

@ -261,7 +261,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
ctr.restarting = false
ctr.client.deleteContainer(ctr.friendlyName)
if err == nil {
if err = ctr.client.Create(ctr.containerID, ctr.ociSpec, ctr.options...); err != nil {
if err = ctr.client.Create(ctr.containerID, "", "", ctr.ociSpec, ctr.options...); err != nil {
logrus.Errorf("libcontainerd: error restarting %v", err)
}
}

Просмотреть файл

@ -36,7 +36,7 @@ type Backend interface {
// Client provides access to containerd features.
type Client interface {
Create(containerID string, spec Spec, options ...CreateOption) error
Create(containerID string, checkpoint string, checkpointDir string, spec Spec, options ...CreateOption) error
Signal(containerID string, sig int) error
SignalProcess(containerID string, processFriendlyName string, sig int) error
AddProcess(ctx context.Context, containerID, processFriendlyName string, process Process) error
@ -48,6 +48,9 @@ type Client interface {
GetPidsForContainer(containerID string) ([]int, error)
Summary(containerID string) ([]Summary, error)
UpdateResources(containerID string, resources Resources) error
CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error
DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error
ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error)
}
// CreateOption allows to configure parameters of container creation.

Просмотреть файл

@ -53,3 +53,6 @@ type User specs.User
// Resources defines updatable container resource values.
type Resources containerd.UpdateResource
// Checkpoints contains the details of a checkpoint
type Checkpoints containerd.ListCheckpointResponse

Просмотреть файл

@ -37,3 +37,13 @@ type Resources struct{}
type ServicingOption struct {
IsServicing bool
}
// Checkpoint holds the details of a checkpoint (not supported in windows)
type Checkpoint struct {
Name string
}
// Checkpoints contains the details of a checkpoint
type Checkpoints struct {
Checkpoints []*Checkpoint
}

Просмотреть файл

@ -27,7 +27,7 @@ func (pm *Manager) enable(p *v2.Plugin, force bool) error {
}
p.RestartManager = restartmanager.New(container.RestartPolicy{Name: "always"}, 0)
if err := pm.containerdClient.Create(p.GetID(), libcontainerd.Spec(*spec), libcontainerd.WithRestartManager(p.RestartManager)); err != nil {
if err := pm.containerdClient.Create(p.GetID(), "", "", libcontainerd.Spec(*spec), libcontainerd.WithRestartManager(p.RestartManager)); err != nil {
if err := p.RestartManager.Cancel(); err != nil {
logrus.Errorf("enable: restartManager.Cancel failed due to %v", err)
}