зеркало из https://github.com/microsoft/docker.git
Add documentation and update restart rules.
Implement time backed backoff for restarting and fix failure count when the maximum is 0 Signed-off-by: Michael Crosby <michael@docker.com>
This commit is contained in:
Родитель
2b0776c883
Коммит
860c13b788
|
@ -11,6 +11,8 @@ import (
|
|||
"github.com/docker/docker/utils"
|
||||
)
|
||||
|
||||
const defaultTimeIncrement = 100
|
||||
|
||||
// containerMonitor monitors the execution of a container's main process.
|
||||
// If a restart policy is specified for the cotnainer the monitor will ensure that the
|
||||
// process is restarted based on the rules of the policy. When the container is finally stopped
|
||||
|
@ -19,16 +21,30 @@ import (
|
|||
type containerMonitor struct {
|
||||
mux sync.Mutex
|
||||
|
||||
container *Container
|
||||
// container is the container being monitored
|
||||
container *Container
|
||||
|
||||
// restartPolicy is the being applied to the container monitor
|
||||
restartPolicy runconfig.RestartPolicy
|
||||
failureCount int
|
||||
shouldStop bool
|
||||
|
||||
// failureCount is the number of times the container has failed to
|
||||
// start in a row
|
||||
failureCount int
|
||||
|
||||
// shouldStop signals the monitor that the next time the container exits it is
|
||||
// either because docker or the user asked for the container to be stopped
|
||||
shouldStop bool
|
||||
|
||||
// timeIncrement is the amount of time to wait between restarts
|
||||
// this is in milliseconds
|
||||
timeIncrement int
|
||||
}
|
||||
|
||||
func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
|
||||
return &containerMonitor{
|
||||
container: container,
|
||||
restartPolicy: policy,
|
||||
timeIncrement: defaultTimeIncrement,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -62,7 +78,7 @@ func (m *containerMonitor) Close() error {
|
|||
|
||||
// reset resets the container's IO and ensures that the command is able to be executed again
|
||||
// by copying the data into a new struct
|
||||
func (m *containerMonitor) reset() {
|
||||
func (m *containerMonitor) reset(successful bool) {
|
||||
container := m.container
|
||||
|
||||
if container.Config.OpenStdin {
|
||||
|
@ -107,14 +123,29 @@ func (m *containerMonitor) reset() {
|
|||
Dir: c.Dir,
|
||||
SysProcAttr: c.SysProcAttr,
|
||||
}
|
||||
|
||||
// the container exited successfully so we need to reset the failure counter
|
||||
// and the timeIncrement back to the default values
|
||||
if successful {
|
||||
m.failureCount = 0
|
||||
m.timeIncrement = defaultTimeIncrement
|
||||
} else {
|
||||
// otherwise we need to increment the amount of time we wait before restarting
|
||||
// the process. We will build up by multiplying the increment by 2
|
||||
|
||||
m.failureCount++
|
||||
m.timeIncrement *= 2
|
||||
}
|
||||
}
|
||||
|
||||
// Start starts the containers process and monitors it according to the restart policy
|
||||
func (m *containerMonitor) Start() error {
|
||||
var (
|
||||
err error
|
||||
exitCode int
|
||||
err error
|
||||
exitStatus int
|
||||
)
|
||||
|
||||
// ensure that when the monitor finally exits we release the networking and unmount the rootfs
|
||||
defer m.Close()
|
||||
|
||||
// reset the restart count
|
||||
|
@ -122,31 +153,26 @@ func (m *containerMonitor) Start() error {
|
|||
|
||||
for !m.shouldStop {
|
||||
m.container.RestartCount++
|
||||
|
||||
if err := m.container.startLoggingToDisk(); err != nil {
|
||||
m.reset()
|
||||
m.reset(false)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
|
||||
|
||||
if exitCode, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
|
||||
m.failureCount++
|
||||
|
||||
if m.failureCount == m.restartPolicy.MaximumRetryCount {
|
||||
m.ExitOnNext()
|
||||
}
|
||||
|
||||
if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
|
||||
utils.Errorf("Error running container: %s", err)
|
||||
}
|
||||
|
||||
// We still wait to set the state as stopped and ensure that the locks were released
|
||||
m.container.State.SetStopped(exitCode)
|
||||
m.container.State.SetStopped(exitStatus)
|
||||
|
||||
m.reset()
|
||||
m.reset(err == nil && exitStatus == 0)
|
||||
|
||||
if m.shouldRestart(exitCode) {
|
||||
time.Sleep(1 * time.Second)
|
||||
if m.shouldRestart(exitStatus) {
|
||||
time.Sleep(time.Duration(m.timeIncrement) * time.Millisecond)
|
||||
|
||||
continue
|
||||
}
|
||||
|
@ -157,16 +183,31 @@ func (m *containerMonitor) Start() error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (m *containerMonitor) shouldRestart(exitCode int) bool {
|
||||
// shouldRestart checks the restart policy and applies the rules to determine if
|
||||
// the container's process should be restarted
|
||||
func (m *containerMonitor) shouldRestart(exitStatus int) bool {
|
||||
m.mux.Lock()
|
||||
defer m.mux.Unlock()
|
||||
|
||||
shouldRestart := (m.restartPolicy.Name == "always" ||
|
||||
(m.restartPolicy.Name == "on-failure" && exitCode != 0)) &&
|
||||
!m.shouldStop
|
||||
// do not restart if the user or docker has requested that this container be stopped
|
||||
if m.shouldStop {
|
||||
return false
|
||||
}
|
||||
|
||||
m.mux.Unlock()
|
||||
switch m.restartPolicy.Name {
|
||||
case "always":
|
||||
return true
|
||||
case "on-failure":
|
||||
// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
|
||||
if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max {
|
||||
utils.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max)
|
||||
return false
|
||||
}
|
||||
|
||||
return shouldRestart
|
||||
return exitStatus != 0
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// callback ensures that the container's state is properly updated after we
|
||||
|
|
|
@ -993,6 +993,7 @@ removed before the image is removed.
|
|||
format: ip:hostPort:containerPort | ip::containerPort | hostPort:containerPort
|
||||
(use 'docker port' to see the actual mapping)
|
||||
--privileged=false Give extended privileges to this container
|
||||
--restart="" Restart policy to apply when a container exits (no, on-failure, always)
|
||||
--rm=false Automatically remove the container when it exits (incompatible with -d)
|
||||
--sig-proxy=true Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.
|
||||
-t, --tty=false Allocate a pseudo-TTY
|
||||
|
@ -1220,6 +1221,31 @@ application change:
|
|||
`--rm` option means that when the container exits, the container's layer is
|
||||
removed.
|
||||
|
||||
#### Restart Policies
|
||||
|
||||
Using the `--restart` flag on docker run you can specify a restart policy for
|
||||
how a container should or should not be restarted on exit.
|
||||
|
||||
** no ** - Do not restart the container when it exits.
|
||||
|
||||
** on-failure ** - Restart the container only if it exits with a non zero exit status.
|
||||
|
||||
** always ** - Always restart the container reguardless of the exit status.
|
||||
|
||||
You can also specify the maximum amount of times docker will try to restart the
|
||||
container when using the ** on-failure ** policy. The default is that docker will try forever to restart the container.
|
||||
|
||||
$ sudo docker run --restart=always redis
|
||||
|
||||
This will run the redis container with a restart policy of ** always ** so that if
|
||||
the container exits, docker will restart it.
|
||||
|
||||
$ sudo docker run --restart=on-failure:10 redis
|
||||
|
||||
This will run the redis container with a restart policy of ** on-failure ** and a
|
||||
maximum restart count of 10. If the redis container exits with a non-zero exit
|
||||
status more than 10 times in a row docker will abort trying to restart the container.
|
||||
|
||||
## save
|
||||
|
||||
Usage: docker save IMAGE
|
||||
|
|
|
@ -17,11 +17,12 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
ErrInvalidWorkingDirectory = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
|
||||
ErrConflictAttachDetach = fmt.Errorf("Conflicting options: -a and -d")
|
||||
ErrConflictDetachAutoRemove = fmt.Errorf("Conflicting options: --rm and -d")
|
||||
ErrConflictNetworkHostname = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
|
||||
ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
|
||||
ErrInvalidWorkingDirectory = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
|
||||
ErrConflictAttachDetach = fmt.Errorf("Conflicting options: -a and -d")
|
||||
ErrConflictDetachAutoRemove = fmt.Errorf("Conflicting options: --rm and -d")
|
||||
ErrConflictNetworkHostname = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
|
||||
ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
|
||||
ErrConflictRestartPolicyAndAutoRemove = fmt.Errorf("Conflicting options: --restart and --rm")
|
||||
)
|
||||
|
||||
//FIXME Only used in tests
|
||||
|
@ -72,7 +73,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
|
|||
flCpuShares = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
|
||||
flCpuset = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
|
||||
flNetMode = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container. Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
|
||||
flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy when the dies")
|
||||
flRestartPolicy = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure, always)")
|
||||
// For documentation purpose
|
||||
_ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.")
|
||||
_ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container")
|
||||
|
@ -227,8 +228,6 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
|
|||
}
|
||||
// parse the '-e' and '--env' after, to allow override
|
||||
envVariables = append(envVariables, flEnv.GetAll()...)
|
||||
// boo, there's no debug output for docker run
|
||||
//log.Debugf("Environment variables for the container: %#v", envVariables)
|
||||
|
||||
netMode, err := parseNetMode(*flNetMode)
|
||||
if err != nil {
|
||||
|
@ -240,6 +239,10 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
|
|||
return nil, nil, cmd, err
|
||||
}
|
||||
|
||||
if *flAutoRemove && (restartPolicy.Name == "always" || restartPolicy.Name == "on-failure") {
|
||||
return nil, nil, cmd, ErrConflictRestartPolicyAndAutoRemove
|
||||
}
|
||||
|
||||
config := &Config{
|
||||
Hostname: hostname,
|
||||
Domainname: domainname,
|
||||
|
@ -307,7 +310,15 @@ func parseRestartPolicy(policy string) (RestartPolicy, error) {
|
|||
)
|
||||
|
||||
switch name {
|
||||
case "no", "on-failure", "always":
|
||||
case "always":
|
||||
p.Name = name
|
||||
|
||||
if len(parts) == 2 {
|
||||
return p, fmt.Errorf("maximum restart count not valid with restart policy of \"always\"")
|
||||
}
|
||||
case "no":
|
||||
// do nothing
|
||||
case "on-failure":
|
||||
p.Name = name
|
||||
|
||||
if len(parts) == 2 {
|
||||
|
|
Загрузка…
Ссылка в новой задаче