Add documentation and update restart rules.

Implement time backed backoff for restarting and fix failure count when
the maximum is 0

Signed-off-by: Michael Crosby <michael@docker.com>
This commit is contained in:
Michael Crosby 2014-08-04 18:20:53 -07:00
Родитель 2b0776c883
Коммит 860c13b788
3 изменённых файлов: 111 добавлений и 33 удалений

Просмотреть файл

@ -11,6 +11,8 @@ import (
"github.com/docker/docker/utils"
)
const defaultTimeIncrement = 100
// containerMonitor monitors the execution of a container's main process.
// If a restart policy is specified for the cotnainer the monitor will ensure that the
// process is restarted based on the rules of the policy. When the container is finally stopped
@ -19,16 +21,30 @@ import (
type containerMonitor struct {
mux sync.Mutex
container *Container
// container is the container being monitored
container *Container
// restartPolicy is the being applied to the container monitor
restartPolicy runconfig.RestartPolicy
failureCount int
shouldStop bool
// failureCount is the number of times the container has failed to
// start in a row
failureCount int
// shouldStop signals the monitor that the next time the container exits it is
// either because docker or the user asked for the container to be stopped
shouldStop bool
// timeIncrement is the amount of time to wait between restarts
// this is in milliseconds
timeIncrement int
}
func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
return &containerMonitor{
container: container,
restartPolicy: policy,
timeIncrement: defaultTimeIncrement,
}
}
@ -62,7 +78,7 @@ func (m *containerMonitor) Close() error {
// reset resets the container's IO and ensures that the command is able to be executed again
// by copying the data into a new struct
func (m *containerMonitor) reset() {
func (m *containerMonitor) reset(successful bool) {
container := m.container
if container.Config.OpenStdin {
@ -107,14 +123,29 @@ func (m *containerMonitor) reset() {
Dir: c.Dir,
SysProcAttr: c.SysProcAttr,
}
// the container exited successfully so we need to reset the failure counter
// and the timeIncrement back to the default values
if successful {
m.failureCount = 0
m.timeIncrement = defaultTimeIncrement
} else {
// otherwise we need to increment the amount of time we wait before restarting
// the process. We will build up by multiplying the increment by 2
m.failureCount++
m.timeIncrement *= 2
}
}
// Start starts the containers process and monitors it according to the restart policy
func (m *containerMonitor) Start() error {
var (
err error
exitCode int
err error
exitStatus int
)
// ensure that when the monitor finally exits we release the networking and unmount the rootfs
defer m.Close()
// reset the restart count
@ -122,31 +153,26 @@ func (m *containerMonitor) Start() error {
for !m.shouldStop {
m.container.RestartCount++
if err := m.container.startLoggingToDisk(); err != nil {
m.reset()
m.reset(false)
return err
}
pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
if exitCode, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
m.failureCount++
if m.failureCount == m.restartPolicy.MaximumRetryCount {
m.ExitOnNext()
}
if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
utils.Errorf("Error running container: %s", err)
}
// We still wait to set the state as stopped and ensure that the locks were released
m.container.State.SetStopped(exitCode)
m.container.State.SetStopped(exitStatus)
m.reset()
m.reset(err == nil && exitStatus == 0)
if m.shouldRestart(exitCode) {
time.Sleep(1 * time.Second)
if m.shouldRestart(exitStatus) {
time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
continue
}
@ -157,16 +183,31 @@ func (m *containerMonitor) Start() error {
return err
}
func (m *containerMonitor) shouldRestart(exitCode int) bool {
// shouldRestart checks the restart policy and applies the rules to determine if
// the container's process should be restarted
func (m *containerMonitor) shouldRestart(exitStatus int) bool {
m.mux.Lock()
defer m.mux.Unlock()
shouldRestart := (m.restartPolicy.Name == "always" ||
(m.restartPolicy.Name == "on-failure" && exitCode != 0)) &&
!m.shouldStop
// do not restart if the user or docker has requested that this container be stopped
if m.shouldStop {
return false
}
m.mux.Unlock()
switch m.restartPolicy.Name {
case "always":
return true
case "on-failure":
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max {
utils.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max)
return false
}
return shouldRestart
return exitStatus != 0
}
return false
}
// callback ensures that the container's state is properly updated after we

Просмотреть файл

@ -993,6 +993,7 @@ removed before the image is removed.
format: ip:hostPort:containerPort | ip::containerPort | hostPort:containerPort
(use 'docker port' to see the actual mapping)
--privileged=false Give extended privileges to this container
--restart="" Restart policy to apply when a container exits (no, on-failure, always)
--rm=false Automatically remove the container when it exits (incompatible with -d)
--sig-proxy=true Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.
-t, --tty=false Allocate a pseudo-TTY
@ -1220,6 +1221,31 @@ application change:
`--rm` option means that when the container exits, the container's layer is
removed.
#### Restart Policies
Using the `--restart` flag on docker run you can specify a restart policy for
how a container should or should not be restarted on exit.
** no ** - Do not restart the container when it exits.
** on-failure ** - Restart the container only if it exits with a non zero exit status.
** always ** - Always restart the container reguardless of the exit status.
You can also specify the maximum amount of times docker will try to restart the
container when using the ** on-failure ** policy. The default is that docker will try forever to restart the container.
$ sudo docker run --restart=always redis
This will run the redis container with a restart policy of ** always ** so that if
the container exits, docker will restart it.
$ sudo docker run --restart=on-failure:10 redis
This will run the redis container with a restart policy of ** on-failure ** and a
maximum restart count of 10. If the redis container exits with a non-zero exit
status more than 10 times in a row docker will abort trying to restart the container.
## save
Usage: docker save IMAGE

Просмотреть файл

@ -17,11 +17,12 @@ import (
)
var (
ErrInvalidWorkingDirectory = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
ErrConflictAttachDetach = fmt.Errorf("Conflicting options: -a and -d")
ErrConflictDetachAutoRemove = fmt.Errorf("Conflicting options: --rm and -d")
ErrConflictNetworkHostname = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
ErrInvalidWorkingDirectory = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
ErrConflictAttachDetach = fmt.Errorf("Conflicting options: -a and -d")
ErrConflictDetachAutoRemove = fmt.Errorf("Conflicting options: --rm and -d")
ErrConflictNetworkHostname = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
ErrConflictRestartPolicyAndAutoRemove = fmt.Errorf("Conflicting options: --restart and --rm")
)
//FIXME Only used in tests
@ -72,7 +73,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
flCpuShares = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy when the dies")
flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure, always)")
// For documentation purpose
_ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.")
_ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container")
@ -227,8 +228,6 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
}
// parse the '-e' and '--env' after, to allow override
envVariables = append(envVariables, flEnv.GetAll()...)
// boo, there's no debug output for docker run
//log.Debugf("Environment variables for the container: %#v", envVariables)
netMode, err := parseNetMode(*flNetMode)
if err != nil {
@ -240,6 +239,10 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
return nil, nil, cmd, err
}
if *flAutoRemove && (restartPolicy.Name == "always" || restartPolicy.Name == "on-failure") {
return nil, nil, cmd, ErrConflictRestartPolicyAndAutoRemove
}
config := &Config{
Hostname: hostname,
Domainname: domainname,
@ -307,7 +310,15 @@ func parseRestartPolicy(policy string) (RestartPolicy, error) {
)
switch name {
case "no", "on-failure", "always":
case "always":
p.Name = name
if len(parts) == 2 {
return p, fmt.Errorf("maximum restart count not valid with restart policy of \"always\"")
}
case "no":
// do nothing
case "on-failure":
p.Name = name
if len(parts) == 2 {