Handle removing containers with active exec sessions

For containers without --force set, an error will be returned

For containers with --force, all pids in the container will be
stopped, first with SIGTERM and then with SIGKILL after a timeout
(this mimics the behavior of stopping a container).

Signed-off-by: Matthew Heon <matthew.heon@gmail.com>

Closes: #412
Approved by: baude
This commit is contained in:
Matthew Heon
2018-02-27 15:01:29 -05:00
committed by Atomic Bot
parent 70baafc1c7
commit f02a9cd975
3 changed files with 115 additions and 0 deletions

View File

@ -147,6 +147,40 @@ func waitContainerStop(ctr *Container, timeout time.Duration) error {
}
}
// Wait for a set of given PIDs to stop
func waitPidsStop(pids []int, timeout time.Duration) error {
done := make(chan struct{})
chControl := make(chan struct{})
go func() {
for {
select {
case <-chControl:
return
default:
allClosed := true
for _, pid := range pids {
if err := unix.Kill(pid, 0); err != unix.ESRCH {
allClosed = false
break
}
}
if allClosed {
close(done)
return
}
time.Sleep(100 * time.Millisecond)
}
}
}()
select {
case <-done:
return nil
case <-time.After(timeout):
close(chControl)
return errors.Errorf("given PIDs did not die within timeout")
}
}
// CreateContainer creates a container in the OCI runtime
// TODO terminal support for container
// Presently just ignoring conmon opts related to it
@ -524,3 +558,62 @@ func (r *OCIRuntime) execContainer(c *Container, cmd, capAdd, env []string, tty
return execCmd, nil
}
// execStopContainer stops all active exec sessions in a container
// It will also stop all other processes in the container. It is only intended
// to be used to assist in cleanup when removing a container.
// SIGTERM is used by default to stop processes. If SIGTERM fails, SIGKILL will be used.
func (r *OCIRuntime) execStopContainer(ctr *Container, timeout uint) error {
// Do we have active exec sessions?
if len(ctr.state.ExecSessions) == 0 {
return nil
}
// Get a list of active exec sessions
execSessions := []int{}
for _, pid := range ctr.state.ExecSessions {
// Ping the PID with signal 0 to see if it still exists
if err := unix.Kill(pid, 0); err == unix.ESRCH {
continue
}
execSessions = append(execSessions, pid)
}
// All the sessions may be dead
// If they are, just return
if len(execSessions) == 0 {
return nil
}
// If timeout is 0, just use SIGKILL
if timeout > 0 {
// Stop using SIGTERM by default
// Use SIGSTOP after a timeout
logrus.Debugf("Killing all processes in container %s with SIGTERM", ctr.ID())
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", "--all", ctr.ID(), "TERM"); err != nil {
return errors.Wrapf(err, "error sending SIGTERM to container %s processes", ctr.ID())
}
// Wait for all processes to stop
if err := waitPidsStop(execSessions, time.Duration(timeout)*time.Second); err != nil {
logrus.Warnf("Timed out stopping container %s exec sessions", ctr.ID())
} else {
// No error, all exec sessions are dead
return nil
}
}
// Send SIGKILL
logrus.Debugf("Killing all processes in container %s with SIGKILL", ctr.ID())
if err := utils.ExecCmdWithStdStreams(os.Stdin, os.Stdout, os.Stderr, r.path, "kill", "--all", ctr.ID(), "KILL"); err != nil {
return errors.Wrapf(err, "error sending SIGKILL to container %s processes", ctr.ID())
}
// Give the processes a few seconds to go down
if err := waitPidsStop(execSessions, killContainerTimeout); err != nil {
return errors.Wrapf(err, "failed to kill container %s exec sessions", ctr.ID())
}
return nil
}

View File

@ -165,6 +165,17 @@ func (r *Runtime) removeContainer(c *Container, force bool) error {
return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed", c.ID(), c.state.State.String())
}
// Check that all of our exec sessions have finished
if len(c.state.ExecSessions) != 0 {
if force {
if err := r.ociRuntime.execStopContainer(c, c.StopTimeout()); err != nil {
return err
}
} else {
return errors.Wrapf(ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
}
}
// Check that no other containers depend on the container
deps, err := r.state.ContainerInUse(c)
if err != nil {

View File

@ -101,6 +101,11 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error {
return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which is running", p.ID(), ctr.ID())
}
// If the container has active exec sessions and force is not set we can't do anything
if len(ctr.state.ExecSessions) != 0 && !force {
return errors.Wrapf(ErrCtrStateInvalid, "pod %s contains container %s which has active exec sessions", p.ID(), ctr.ID())
}
deps, err := r.state.ContainerInUse(ctr)
if err != nil {
return err
@ -134,6 +139,12 @@ func (r *Runtime) RemovePod(p *Pod, removeCtrs, force bool) error {
return err
}
}
// If the container has active exec sessions, stop them now
if len(ctr.state.ExecSessions) != 0 {
if err := r.ociRuntime.execStopContainer(ctr, ctr.StopTimeout()); err != nil {
return err
}
}
}
}