Merge pull request #7414 from crosbymichael/auto-restart

Auto restart containers based on restart policy
2014-08-13 16:17:51 -07:00 · 2014-08-13 16:17:51 -07:00 · b95f6c183b
--- a/daemon/config.go
+++ b/daemon/config.go
@ -45,7 +45,7 @@ type Config struct {
 func (config *Config) InstallFlags() {
 	flag.StringVar(&config.Pidfile, []string{"p", "-pidfile"}, "/var/run/docker.pid", "Path to use for daemon PID file")
 	flag.StringVar(&config.Root, []string{"g", "-graph"}, "/var/lib/docker", "Path to use as the root of the Docker runtime")
-	flag.BoolVar(&config.AutoRestart, []string{"r", "-restart"}, true, "Restart previously running containers")
+	flag.BoolVar(&config.AutoRestart, []string{"#r", "#-restart"}, true, "--restart on the daemon has been deprecated infavor of --restart policies on docker run")
 	flag.BoolVar(&config.EnableIptables, []string{"#iptables", "-iptables"}, true, "Enable Docker's addition of iptables rules")
 	flag.BoolVar(&config.EnableIpForward, []string{"#ip-forward", "-ip-forward"}, true, "Enable net.ipv4.ip_forward")
 	flag.StringVar(&config.BridgeIP, []string{"#bip", "-bip"}, "", "Use this CIDR notation address for the network bridge's IP, not compatible with -b")
--- a/daemon/container.go
+++ b/daemon/container.go
@ -75,6 +75,7 @@ type Container struct {

 	daemon                   *Daemon
 	MountLabel, ProcessLabel string
+	RestartCount             int

 	Volumes map[string]string
 	// Store rw/ro in a separate structure to preserve reverse-compatibility on-disk.
@ -83,6 +84,7 @@ type Container struct {
 	hostConfig *runconfig.HostConfig

 	activeLinks map[string]*links.Link
+	monitor     *containerMonitor
 }

 func (container *Container) FromDisk() error {
@ -277,6 +279,7 @@ func (container *Container) Start() (err error) {
 	if container.State.IsRunning() {
 		return nil
 	}
+
 	// if we encounter and error during start we need to ensure that any other
 	// setup has been cleaned up properly
 	defer func() {
@ -312,9 +315,6 @@ func (container *Container) Start() (err error) {
 	if err := setupMountsForContainer(container); err != nil {
 		return err
 	}
-	if err := container.startLoggingToDisk(); err != nil {
-		return err
-	}

 	return container.waitForStart()
 }
@ -495,37 +495,8 @@ func (container *Container) releaseNetwork() {
 	container.NetworkSettings = &NetworkSettings{}
 }

-func (container *Container) monitor(callback execdriver.StartCallback) error {
-	var (
-		err      error
-		exitCode int
-	)
-
-	pipes := execdriver.NewPipes(container.stdin, container.stdout, container.stderr, container.Config.OpenStdin)
-	exitCode, err = container.daemon.Run(container, pipes, callback)
-	if err != nil {
-		log.Errorf("Error running container: %s", err)
-	}
-	container.State.SetStopped(exitCode)
-
-	// Cleanup
-	container.cleanup()
-
-	// Re-create a brand new stdin pipe once the container exited
-	if container.Config.OpenStdin {
-		container.stdin, container.stdinPipe = io.Pipe()
-	}
-	container.LogEvent("die")
-	// If the engine is shutting down, don't save the container state as stopped.
-	// This will cause it to be restarted when the engine is restarted.
-	if container.daemon != nil && container.daemon.eng != nil && !container.daemon.eng.IsShutdown() {
-		if err := container.toDisk(); err != nil {
-			log.Errorf("Error dumping container %s state to disk: %s\n", container.ID, err)
-		}
-	}
-	return err
-}
-
+// cleanup releases any network resources allocated to the container along with any rules
+// around how containers are linked together.  It also unmounts the container's root filesystem.
 func (container *Container) cleanup() {
 	container.releaseNetwork()

@ -535,22 +506,6 @@ func (container *Container) cleanup() {
 			link.Disable()
 		}
 	}
-	if container.Config.OpenStdin {
-		if err := container.stdin.Close(); err != nil {
-			log.Errorf("%s: Error close stdin: %s", container.ID, err)
-		}
-	}
-	if err := container.stdout.Clean(); err != nil {
-		log.Errorf("%s: Error close stdout: %s", container.ID, err)
-	}
-	if err := container.stderr.Clean(); err != nil {
-		log.Errorf("%s: Error close stderr: %s", container.ID, err)
-	}
-	if container.command != nil && container.command.Terminal != nil {
-		if err := container.command.Terminal.Close(); err != nil {
-			log.Errorf("%s: Error closing terminal: %s", container.ID, err)
-		}
-	}

 	if err := container.Unmount(); err != nil {
 		log.Errorf("%v: Failed to umount filesystem: %v", container.ID, err)
@ -570,6 +525,18 @@ func (container *Container) KillSig(sig int) error {
 	if !container.State.IsRunning() {
 		return nil
 	}
+
+	// signal to the monitor that it should not restart the container
+	// after we send the kill signal
+	container.monitor.ExitOnNext()
+
+	// if the container is currently restarting we do not need to send the signal
+	// to the process.  Telling the monitor that it should exit on it's next event
+	// loop is enough
+	if container.State.IsRestarting() {
+		return nil
+	}
+
 	return container.daemon.Kill(container, sig)
 }

@ -1112,33 +1079,16 @@ func (container *Container) startLoggingToDisk() error {
 }

 func (container *Container) waitForStart() error {
-	waitStart := make(chan struct{})
-	callback := func(command *execdriver.Command) {
-		if command.Tty {
-			// The callback is called after the process Start()
-			// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
-			// which we close here.
-			if c, ok := command.Stdout.(io.Closer); ok {
-				c.Close()
-			}
-		}
-		container.State.SetRunning(command.Pid())
-		if err := container.toDisk(); err != nil {
-			log.Debugf("%s", err)
-		}
-		close(waitStart)
-	}
+	container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)

-	// We use a callback here instead of a goroutine and an chan for
-	// syncronization purposes
-	cErr := utils.Go(func() error { return container.monitor(callback) })
-
-	// Start should not return until the process is actually running
+	// block until we either receive an error from the initial start of the container's
+	// process or until the process is running in the container
 	select {
-	case <-waitStart:
-	case err := <-cErr:
+	case <-container.monitor.startSignal:
+	case err := <-utils.Go(container.monitor.Start):
 		return err
 	}
+
 	return nil
 }

--- a/daemon/daemon.go
+++ b/daemon/daemon.go
@ -172,20 +172,24 @@ func (daemon *Daemon) load(id string) (*Container, error) {
 	if err := container.FromDisk(); err != nil {
 		return nil, err
 	}
+
 	if container.ID != id {
 		return container, fmt.Errorf("Container %s is stored at %s", container.ID, id)
 	}
+
+	container.readHostConfig()
+
 	return container, nil
 }

 // Register makes a container object usable by the daemon as <container.ID>
 // This is a wrapper for register
 func (daemon *Daemon) Register(container *Container) error {
-	return daemon.register(container, true, nil)
+	return daemon.register(container, true)
 }

 // register makes a container object usable by the daemon as <container.ID>
-func (daemon *Daemon) register(container *Container, updateSuffixarray bool, containersToStart *[]*Container) error {
+func (daemon *Daemon) register(container *Container, updateSuffixarray bool) error {
 	if container.daemon != nil || daemon.Exists(container.ID) {
 		return fmt.Errorf("Container is already loaded")
 	}
@ -257,14 +261,6 @@ func (daemon *Daemon) register(container *Container, updateSuffixarray bool, con
 			if err := container.ToDisk(); err != nil {
 				return err
 			}
-
-			if daemon.config.AutoRestart {
-				log.Debugf("Marking as restarting")
-
-				if containersToStart != nil {
-					*containersToStart = append(*containersToStart, container)
-				}
-			}
 		}
 	}
 	return nil
@ -296,10 +292,9 @@ func (daemon *Daemon) LogToDisk(src *broadcastwriter.BroadcastWriter, dst, strea

 func (daemon *Daemon) restore() error {
 	var (
-		debug             = (os.Getenv("DEBUG") != "" || os.Getenv("TEST") != "")
-		containers        = make(map[string]*Container)
-		currentDriver     = daemon.driver.String()
-		containersToStart = []*Container{}
+		debug         = (os.Getenv("DEBUG") != "" || os.Getenv("TEST") != "")
+		containers    = make(map[string]*Container)
+		currentDriver = daemon.driver.String()
 	)

 	if !debug {
@ -322,24 +317,33 @@ func (daemon *Daemon) restore() error {
 		}

 		// Ignore the container if it does not support the current driver being used by the graph
-		if container.Driver == "" && currentDriver == "aufs" || container.Driver == currentDriver {
+		if (container.Driver == "" && currentDriver == "aufs") || container.Driver == currentDriver {
 			log.Debugf("Loaded container %v", container.ID)
+
 			containers[container.ID] = container
 		} else {
 			log.Debugf("Cannot load container %s because it was created with another graph driver.", container.ID)
 		}
 	}

+	registeredContainers := []*Container{}
+
 	if entities := daemon.containerGraph.List("/", -1); entities != nil {
 		for _, p := range entities.Paths() {
 			if !debug {
 				fmt.Print(".")
 			}
+
 			e := entities[p]
+
 			if container, ok := containers[e.ID()]; ok {
-				if err := daemon.register(container, false, &containersToStart); err != nil {
+				if err := daemon.register(container, false); err != nil {
 					log.Debugf("Failed to register container %s: %s", container.ID, err)
 				}
+
+				registeredContainers = append(registeredContainers, container)
+
+				// delete from the map so that a new name is not automatically generated
 				delete(containers, e.ID())
 			}
 		}
@ -352,15 +356,28 @@ func (daemon *Daemon) restore() error {
 		if err != nil {
 			log.Debugf("Setting default id - %s", err)
 		}
-		if err := daemon.register(container, false, &containersToStart); err != nil {
+
+		if err := daemon.register(container, false); err != nil {
 			log.Debugf("Failed to register container %s: %s", container.ID, err)
 		}
+
+		registeredContainers = append(registeredContainers, container)
 	}

-	for _, container := range containersToStart {
-		log.Debugf("Starting container %d", container.ID)
-		if err := container.Start(); err != nil {
-			log.Debugf("Failed to start container %s: %s", container.ID, err)
+	// check the restart policy on the containers and restart any container with
+	// the restart policy of "always"
+	if daemon.config.AutoRestart {
+		log.Debugf("Restarting containers...")
+
+		for _, container := range registeredContainers {
+			if container.hostConfig.RestartPolicy.Name == "always" ||
+				(container.hostConfig.RestartPolicy.Name == "on-failure" && container.State.ExitCode != 0) {
+				log.Debugf("Starting container %s", container.ID)
+
+				if err := container.Start(); err != nil {
+					log.Debugf("Failed to start container %s: %s", container.ID, err)
+				}
+			}
 		}
 	}

--- a/daemon/monitor.go
+++ b/daemon/monitor.go
@ -0,0 +1,301 @@
+package daemon
+
+import (
+	"io"
+	"os/exec"
+	"sync"
+	"time"
+
+	"github.com/docker/docker/daemon/execdriver"
+	"github.com/docker/docker/pkg/log"
+	"github.com/docker/docker/runconfig"
+)
+
+const defaultTimeIncrement = 100
+
+// containerMonitor monitors the execution of a container's main process.
+// If a restart policy is specified for the cotnainer the monitor will ensure that the
+// process is restarted based on the rules of the policy.  When the container is finally stopped
+// the monitor will reset and cleanup any of the container resources such as networking allocations
+// and the rootfs
+type containerMonitor struct {
+	mux sync.Mutex
+
+	// container is the container being monitored
+	container *Container
+
+	// restartPolicy is the current policy being applied to the container monitor
+	restartPolicy runconfig.RestartPolicy
+
+	// failureCount is the number of times the container has failed to
+	// start in a row
+	failureCount int
+
+	// shouldStop signals the monitor that the next time the container exits it is
+	// either because docker or the user asked for the container to be stopped
+	shouldStop bool
+
+	// startSignal is a channel that is closes after the container initially starts
+	startSignal chan struct{}
+
+	// stopChan is used to signal to the monitor whenever there is a wait for the
+	// next restart so that the timeIncrement is not honored and the user is not
+	// left waiting for nothing to happen during this time
+	stopChan chan struct{}
+
+	// timeIncrement is the amount of time to wait between restarts
+	// this is in milliseconds
+	timeIncrement int
+
+	// lastStartTime is the time which the monitor last exec'd the container's process
+	lastStartTime time.Time
+}
+
+// newContainerMonitor returns an initialized containerMonitor for the provided container
+// honoring the provided restart policy
+func newContainerMonitor(container *Container, policy runconfig.RestartPolicy) *containerMonitor {
+	return &containerMonitor{
+		container:     container,
+		restartPolicy: policy,
+		timeIncrement: defaultTimeIncrement,
+		stopChan:      make(chan struct{}, 1),
+		startSignal:   make(chan struct{}, 1),
+	}
+}
+
+// Stop signals to the container monitor that it should stop monitoring the container
+// for exits the next time the process dies
+func (m *containerMonitor) ExitOnNext() {
+	m.mux.Lock()
+
+	// we need to protect having a double close of the channel when stop is called
+	// twice or else we will get a panic
+	if !m.shouldStop {
+		m.shouldStop = true
+		close(m.stopChan)
+	}
+
+	m.mux.Unlock()
+}
+
+// Close closes the container's resources such as networking allocations and
+// unmounts the contatiner's root filesystem
+func (m *containerMonitor) Close() error {
+	// Cleanup networking and mounts
+	m.container.cleanup()
+
+	// FIXME: here is race condition between two RUN instructions in Dockerfile
+	// because they share same runconfig and change image. Must be fixed
+	// in builder/builder.go
+	if err := m.container.toDisk(); err != nil {
+		log.Errorf("Error dumping container %s state to disk: %s\n", m.container.ID, err)
+
+		return err
+	}
+
+	return nil
+}
+
+// Start starts the containers process and monitors it according to the restart policy
+func (m *containerMonitor) Start() error {
+	var (
+		err        error
+		exitStatus int
+	)
+
+	// ensure that when the monitor finally exits we release the networking and unmount the rootfs
+	defer m.Close()
+
+	// reset the restart count
+	m.container.RestartCount = -1
+
+	for {
+		m.container.RestartCount++
+
+		if err := m.container.startLoggingToDisk(); err != nil {
+			m.resetContainer()
+
+			return err
+		}
+
+		pipes := execdriver.NewPipes(m.container.stdin, m.container.stdout, m.container.stderr, m.container.Config.OpenStdin)
+
+		m.container.LogEvent("start")
+
+		m.lastStartTime = time.Now()
+
+		if exitStatus, err = m.container.daemon.Run(m.container, pipes, m.callback); err != nil {
+			// if we receive an internal error from the initial start of a container then lets
+			// return it instead of entering the restart loop
+			if m.container.RestartCount == 0 {
+				m.resetContainer()
+
+				return err
+			}
+
+			log.Errorf("Error running container: %s", err)
+		}
+
+		m.resetMonitor(err == nil && exitStatus == 0)
+
+		if m.shouldRestart(exitStatus) {
+			m.container.State.SetRestarting(exitStatus)
+
+			m.container.LogEvent("die")
+
+			m.resetContainer()
+
+			// sleep with a small time increment between each restart to help avoid issues cased by quickly
+			// restarting the container because of some types of errors ( networking cut out, etc... )
+			m.waitForNextRestart()
+
+			// we need to check this before reentering the loop because the waitForNextRestart could have
+			// been terminated by a request from a user
+			if m.shouldStop {
+				m.container.State.SetStopped(exitStatus)
+
+				return err
+			}
+
+			continue
+		}
+
+		m.container.State.SetStopped(exitStatus)
+
+		m.container.LogEvent("die")
+
+		m.resetContainer()
+
+		break
+	}
+
+	return err
+}
+
+// resetMonitor resets the stateful fields on the containerMonitor based on the
+// previous runs success or failure.  Reguardless of success, if the container had
+// an execution time of more than 10s then reset the timer back to the default
+func (m *containerMonitor) resetMonitor(successful bool) {
+	executionTime := time.Now().Sub(m.lastStartTime).Seconds()
+
+	if executionTime > 10 {
+		m.timeIncrement = defaultTimeIncrement
+	} else {
+		// otherwise we need to increment the amount of time we wait before restarting
+		// the process.  We will build up by multiplying the increment by 2
+		m.timeIncrement *= 2
+	}
+
+	// the container exited successfully so we need to reset the failure counter
+	if successful {
+		m.failureCount = 0
+	} else {
+		m.failureCount++
+	}
+}
+
+// waitForNextRestart waits with the default time increment to restart the container unless
+// a user or docker asks for the container to be stopped
+func (m *containerMonitor) waitForNextRestart() {
+	select {
+	case <-time.After(time.Duration(m.timeIncrement) * time.Millisecond):
+	case <-m.stopChan:
+	}
+}
+
+// shouldRestart checks the restart policy and applies the rules to determine if
+// the container's process should be restarted
+func (m *containerMonitor) shouldRestart(exitStatus int) bool {
+	m.mux.Lock()
+	defer m.mux.Unlock()
+
+	// do not restart if the user or docker has requested that this container be stopped
+	if m.shouldStop {
+		return false
+	}
+
+	switch m.restartPolicy.Name {
+	case "always":
+		return true
+	case "on-failure":
+		// the default value of 0 for MaximumRetryCount means that we will not enforce a maximum count
+		if max := m.restartPolicy.MaximumRetryCount; max != 0 && m.failureCount >= max {
+			log.Debugf("stopping restart of container %s because maximum failure could of %d has been reached", max)
+			return false
+		}
+
+		return exitStatus != 0
+	}
+
+	return false
+}
+
+// callback ensures that the container's state is properly updated after we
+// received ack from the execution drivers
+func (m *containerMonitor) callback(command *execdriver.Command) {
+	if command.Tty {
+		// The callback is called after the process Start()
+		// so we are in the parent process. In TTY mode, stdin/out/err is the PtySlace
+		// which we close here.
+		if c, ok := command.Stdout.(io.Closer); ok {
+			c.Close()
+		}
+	}
+
+	m.container.State.SetRunning(command.Pid())
+
+	if m.startSignal != nil {
+		// signal that the process has started
+		close(m.startSignal)
+		m.startSignal = nil
+	}
+
+	if err := m.container.ToDisk(); err != nil {
+		log.Debugf("%s", err)
+	}
+}
+
+// resetContainer resets the container's IO and ensures that the command is able to be executed again
+// by copying the data into a new struct
+func (m *containerMonitor) resetContainer() {
+	container := m.container
+
+	if container.Config.OpenStdin {
+		if err := container.stdin.Close(); err != nil {
+			log.Errorf("%s: Error close stdin: %s", container.ID, err)
+		}
+	}
+
+	if err := container.stdout.Clean(); err != nil {
+		log.Errorf("%s: Error close stdout: %s", container.ID, err)
+	}
+
+	if err := container.stderr.Clean(); err != nil {
+		log.Errorf("%s: Error close stderr: %s", container.ID, err)
+	}
+
+	if container.command != nil && container.command.Terminal != nil {
+		if err := container.command.Terminal.Close(); err != nil {
+			log.Errorf("%s: Error closing terminal: %s", container.ID, err)
+		}
+	}
+
+	// Re-create a brand new stdin pipe once the container exited
+	if container.Config.OpenStdin {
+		container.stdin, container.stdinPipe = io.Pipe()
+	}
+
+	c := container.command.Cmd
+
+	container.command.Cmd = exec.Cmd{
+		Stdin:       c.Stdin,
+		Stdout:      c.Stdout,
+		Stderr:      c.Stderr,
+		Path:        c.Path,
+		Env:         c.Env,
+		ExtraFiles:  c.ExtraFiles,
+		Args:        c.Args,
+		Dir:         c.Dir,
+		SysProcAttr: c.SysProcAttr,
+	}
+}
--- a/daemon/start.go
+++ b/daemon/start.go
@ -36,7 +36,7 @@ func (daemon *Daemon) ContainerStart(job *engine.Job) engine.Status {
 	if err := container.Start(); err != nil {
 		return job.Errorf("Cannot start container %s: %s", name, err)
 	}
-	container.LogEvent("start")
+
 	return engine.StatusOK
 }

--- a/daemon/state.go
+++ b/daemon/state.go
@ -12,6 +12,7 @@ type State struct {
 	sync.RWMutex
 	Running    bool
 	Paused     bool
+	Restarting bool
 	Pid        int
 	ExitCode   int
 	StartedAt  time.Time
@ -34,11 +35,17 @@ func (s *State) String() string {
 		if s.Paused {
 			return fmt.Sprintf("Up %s (Paused)", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 		}
+		if s.Restarting {
+			return fmt.Sprintf("Restarting (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
+		}
+
 		return fmt.Sprintf("Up %s", units.HumanDuration(time.Now().UTC().Sub(s.StartedAt)))
 	}
+
 	if s.FinishedAt.IsZero() {
 		return ""
 	}
+
 	return fmt.Sprintf("Exited (%d) %s ago", s.ExitCode, units.HumanDuration(time.Now().UTC().Sub(s.FinishedAt)))
 }

@ -116,6 +123,7 @@ func (s *State) SetRunning(pid int) {
 	s.Lock()
 	s.Running = true
 	s.Paused = false
+	s.Restarting = false
 	s.ExitCode = 0
 	s.Pid = pid
 	s.StartedAt = time.Now().UTC()
@ -127,6 +135,7 @@ func (s *State) SetRunning(pid int) {
 func (s *State) SetStopped(exitCode int) {
 	s.Lock()
 	s.Running = false
+	s.Restarting = false
 	s.Pid = 0
 	s.FinishedAt = time.Now().UTC()
 	s.ExitCode = exitCode
@ -135,6 +144,29 @@ func (s *State) SetStopped(exitCode int) {
 	s.Unlock()
 }

+// SetRestarting is when docker hanldes the auto restart of containers when they are
+// in the middle of a stop and being restarted again
+func (s *State) SetRestarting(exitCode int) {
+	s.Lock()
+	// we should consider the container running when it is restarting because of
+	// all the checks in docker around rm/stop/etc
+	s.Running = true
+	s.Restarting = true
+	s.Pid = 0
+	s.FinishedAt = time.Now().UTC()
+	s.ExitCode = exitCode
+	close(s.waitChan) // fire waiters for stop
+	s.waitChan = make(chan struct{})
+	s.Unlock()
+}
+
+func (s *State) IsRestarting() bool {
+	s.RLock()
+	res := s.Restarting
+	s.RUnlock()
+	return res
+}
+
 func (s *State) SetPaused() {
 	s.Lock()
 	s.Paused = true
--- a/docs/man/docker-run.1.md
+++ b/docs/man/docker-run.1.md
@ -30,6 +30,7 @@ docker-run - Run a command in a new container
 [**-P**|**--publish-all**[=*false*]]
 [**-p**|**--publish**[=*[]*]]
 [**--privileged**[=*false*]]
+[**--restart**[=*POLICY*]]
 [**--rm**[=*false*]]
 [**--sig-proxy**[=*true*]]
 [**-t**|**--tty**[=*false*]]
--- a/docs/man/docker.1.md
+++ b/docs/man/docker.1.md
@ -64,9 +64,6 @@ unix://[/path/to/socket] to use.
 **-p**=""
  Path to use for daemon PID file. Default is `/var/run/docker.pid`

-**-r**=*true*|*false*
-  Restart previously running containers. Default is true.
-
 **-s**=""
  Force the Docker runtime to use a specific storage driver.

--- a/docs/sources/reference/commandline/cli.md
+++ b/docs/sources/reference/commandline/cli.md
@ -71,7 +71,6 @@ expect an integer, and they can only be specified once.
      --mtu=0                                    Set the containers network MTU
                                                   if no value is provided: default to the default route MTU or 1500 if no default route is available
      -p, --pidfile="/var/run/docker.pid"        Path to use for daemon PID file
-      -r, --restart=true                         Restart previously running containers
      -s, --storage-driver=""                    Force the Docker runtime to use a specific storage driver
      --selinux-enabled=false                    Enable selinux support. SELinux does not presently support the BTRFS storage driver
      --storage-opt=[]                           Set storage driver options
@ -993,6 +992,7 @@ removed before the image is removed.
                                   format: ip:hostPort:containerPort | ip::containerPort | hostPort:containerPort
                                   (use 'docker port' to see the actual mapping)
      --privileged=false         Give extended privileges to this container
+      --restart=""               Restart policy to apply when a container exits (no, on-failure, always)
      --rm=false                 Automatically remove the container when it exits (incompatible with -d)
      --sig-proxy=true           Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.
      -t, --tty=false            Allocate a pseudo-TTY
@ -1220,6 +1220,31 @@ application change:
   `--rm` option means that when the container exits, the container's layer is
   removed.

+#### Restart Policies
+
+Using the `--restart` flag on Docker run you can specify a restart policy for 
+how a container should or should not be restarted on exit.
+
+** no ** - Do not restart the container when it exits.
+
+** on-failure ** - Restart the container only if it exits with a non zero exit status.
+
+** always ** - Always restart the container reguardless of the exit status.
+
+You can also specify the maximum amount of times Docker will try to restart the 
+container when using the ** on-failure ** policy.  The default is that Docker will try forever to restart the container.
+
+    $ sudo docker run --restart=always redis
+
+This will run the `redis` container with a restart policy of ** always ** so that if 
+the container exits, Docker will restart it.
+
+    $ sudo docker run --restart=on-failure:10 redis
+
+This will run the `redis` container with a restart policy of ** on-failure ** and a 
+maximum restart count of 10.  If the `redis` container exits with a non-zero exit 
+status more than 10 times in a row Docker will abort trying to restart the container.
+
 ## save

    Usage: docker save IMAGE
--- a/runconfig/hostconfig.go
+++ b/runconfig/hostconfig.go
@ -25,6 +25,11 @@ type DeviceMapping struct {
 	CgroupPermissions string
 }

+type RestartPolicy struct {
+	Name              string
+	MaximumRetryCount int
+}
+
 type HostConfig struct {
 	Binds           []string
 	ContainerIDFile string
@ -40,6 +45,7 @@ type HostConfig struct {
 	NetworkMode     NetworkMode
 	CapAdd          []string
 	CapDrop         []string
+	RestartPolicy   RestartPolicy
 }

 func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
@ -49,9 +55,11 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
 		PublishAllPorts: job.GetenvBool("PublishAllPorts"),
 		NetworkMode:     NetworkMode(job.Getenv("NetworkMode")),
 	}
+
 	job.GetenvJson("LxcConf", &hostConfig.LxcConf)
 	job.GetenvJson("PortBindings", &hostConfig.PortBindings)
 	job.GetenvJson("Devices", &hostConfig.Devices)
+	job.GetenvJson("RestartPolicy", &hostConfig.RestartPolicy)
 	if Binds := job.GetenvList("Binds"); Binds != nil {
 		hostConfig.Binds = Binds
 	}
@ -73,5 +81,6 @@ func ContainerHostConfigFromJob(job *engine.Job) *HostConfig {
 	if CapDrop := job.GetenvList("CapDrop"); CapDrop != nil {
 		hostConfig.CapDrop = CapDrop
 	}
+
 	return hostConfig
 }
--- a/runconfig/parse.go
+++ b/runconfig/parse.go
@ -4,6 +4,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"path"
+	"strconv"
 	"strings"

 	"github.com/docker/docker/nat"
@ -16,11 +17,12 @@ import (
 )

 var (
-	ErrInvalidWorkingDirectory     = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
-	ErrConflictAttachDetach        = fmt.Errorf("Conflicting options: -a and -d")
-	ErrConflictDetachAutoRemove    = fmt.Errorf("Conflicting options: --rm and -d")
-	ErrConflictNetworkHostname     = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
-	ErrConflictHostNetworkAndLinks = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
+	ErrInvalidWorkingDirectory            = fmt.Errorf("The working directory is invalid. It needs to be an absolute path.")
+	ErrConflictAttachDetach               = fmt.Errorf("Conflicting options: -a and -d")
+	ErrConflictDetachAutoRemove           = fmt.Errorf("Conflicting options: --rm and -d")
+	ErrConflictNetworkHostname            = fmt.Errorf("Conflicting options: -h and the network mode (--net)")
+	ErrConflictHostNetworkAndLinks        = fmt.Errorf("Conflicting options: --net=host can't be used with links. This would result in undefined behavior.")
+	ErrConflictRestartPolicyAndAutoRemove = fmt.Errorf("Conflicting options: --restart and --rm")
 )

 //FIXME Only used in tests
@ -71,6 +73,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		flCpuShares       = cmd.Int64([]string{"c", "-cpu-shares"}, 0, "CPU shares (relative weight)")
 		flCpuset          = cmd.String([]string{"-cpuset"}, "", "CPUs in which to allow execution (0-3, 0,1)")
 		flNetMode         = cmd.String([]string{"-net"}, "bridge", "Set the Network mode for the container\n'bridge': creates a new network stack for the container on the docker bridge\n'none': no networking for this container\n'container:<name|id>': reuses another container network stack\n'host': use the host network stack inside the container.  Note: the host mode gives the container full access to local system services such as D-bus and is therefore considered insecure.")
+		flRestartPolicy   = cmd.String([]string{"-restart"}, "", "Restart policy to apply when a container exits (no, on-failure, always)")
 		// For documentation purpose
 		_ = cmd.Bool([]string{"#sig-proxy", "-sig-proxy"}, true, "Proxy received signals to the process (even in non-TTY mode). SIGCHLD, SIGSTOP, and SIGKILL are not proxied.")
 		_ = cmd.String([]string{"#name", "-name"}, "", "Assign a name to the container")
@ -225,14 +228,21 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 	}
 	// parse the '-e' and '--env' after, to allow override
 	envVariables = append(envVariables, flEnv.GetAll()...)
-	// boo, there's no debug output for docker run
-	//log.Debugf("Environment variables for the container: %#v", envVariables)

 	netMode, err := parseNetMode(*flNetMode)
 	if err != nil {
 		return nil, nil, cmd, fmt.Errorf("--net: invalid net mode: %v", err)
 	}

+	restartPolicy, err := parseRestartPolicy(*flRestartPolicy)
+	if err != nil {
+		return nil, nil, cmd, err
+	}
+
+	if *flAutoRemove && (restartPolicy.Name == "always" || restartPolicy.Name == "on-failure") {
+		return nil, nil, cmd, ErrConflictRestartPolicyAndAutoRemove
+	}
+
 	config := &Config{
 		Hostname:        hostname,
 		Domainname:      domainname,
@ -271,6 +281,7 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 		Devices:         deviceMappings,
 		CapAdd:          flCapAdd.GetAll(),
 		CapDrop:         flCapDrop.GetAll(),
+		RestartPolicy:   restartPolicy,
 	}

 	if sysInfo != nil && flMemory > 0 && !sysInfo.SwapLimit {
@ -285,6 +296,46 @@ func parseRun(cmd *flag.FlagSet, args []string, sysInfo *sysinfo.SysInfo) (*Conf
 	return config, hostConfig, cmd, nil
 }

+// parseRestartPolicy returns the parsed policy or an error indicating what is incorrect
+func parseRestartPolicy(policy string) (RestartPolicy, error) {
+	p := RestartPolicy{}
+
+	if policy == "" {
+		return p, nil
+	}
+
+	var (
+		parts = strings.Split(policy, ":")
+		name  = parts[0]
+	)
+
+	switch name {
+	case "always":
+		p.Name = name
+
+		if len(parts) == 2 {
+			return p, fmt.Errorf("maximum restart count not valid with restart policy of \"always\"")
+		}
+	case "no":
+		// do nothing
+	case "on-failure":
+		p.Name = name
+
+		if len(parts) == 2 {
+			count, err := strconv.Atoi(parts[1])
+			if err != nil {
+				return p, err
+			}
+
+			p.MaximumRetryCount = count
+		}
+	default:
+		return p, fmt.Errorf("invalid restart policy %s", name)
+	}
+
+	return p, nil
+}
+
 // options will come in the format of name.key=value or name.option
 func parseDriverOpts(opts opts.ListOpts) (map[string][]string, error) {
 	out := make(map[string][]string, len(opts.GetAll()))