Merge pull request #5305 from mheon/check_for_common_deadlocks

Add basic deadlock detection for container start/remove
This commit is contained in:
OpenShift Merge Robot
2020-02-25 13:17:12 +01:00
committed by GitHub
7 changed files with 32 additions and 3 deletions

View File

@ -4,8 +4,10 @@ import (
"fmt"
"github.com/containers/libpod/cmd/podman/cliconfig"
"github.com/containers/libpod/libpod/define"
"github.com/containers/libpod/pkg/adapter"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
@ -77,6 +79,9 @@ func rmCmd(c *cliconfig.RmValues) error {
if len(failures) > 0 {
for _, err := range failures {
if errors.Cause(err) == define.ErrWillDeadlock {
logrus.Errorf("Potential deadlock detected - please run 'podman system renumber' to resolve")
}
exitCode = setExitCode(err)
}
}

View File

@ -1401,6 +1401,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID())
}
if vol.config.LockID == c.config.LockID {
return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID)
}
vol.lock.Lock()
defer vol.lock.Unlock()
if vol.needsMount() {

View File

@ -61,6 +61,11 @@ var (
// the user.
ErrDetach = utils.ErrDetach
// ErrWillDeadlock indicates that the requested operation will cause a
// deadlock. This is usually caused by upgrade issues, and is resolved
// by renumbering the locks.
ErrWillDeadlock = errors.New("deadlock due to lock mismatch")
// ErrNoCgroups indicates that the container does not have its own
// CGroup.
ErrNoCgroups = errors.New("this container does not have a cgroup")

View File

@ -412,6 +412,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
}
// Lock the pod while we're removing container
if pod.config.LockID == c.config.LockID {
return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID)
}
pod.lock.Lock()
defer pod.lock.Unlock()
if err := pod.updatePod(); err != nil {

View File

@ -36,9 +36,6 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool) error
}
}
v.lock.Lock()
defer v.lock.Unlock()
return r.removeVolume(ctx, v, force)
}

View File

@ -124,6 +124,9 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error
return define.ErrVolumeRemoved
}
v.lock.Lock()
defer v.lock.Unlock()
// Update volume status to pick up a potential removal from state
if err := v.update(); err != nil {
return err

View File

@ -469,6 +469,10 @@ func (r *LocalRuntime) Run(ctx context.Context, c *cliconfig.RunValues, exitCode
logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID())
}
}
if errors.Cause(err) == define.ErrWillDeadlock {
logrus.Debugf("Deadlock error: %v", err)
return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
}
return define.ExitCode(err), err
}
@ -702,6 +706,11 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
return exitCode, nil
}
if errors.Cause(err) == define.ErrWillDeadlock {
logrus.Debugf("Deadlock error: %v", err)
return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
}
if ctrRunning {
return 0, err
}
@ -735,6 +744,10 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
if lastError != nil {
fmt.Fprintln(os.Stderr, lastError)
}
if errors.Cause(err) == define.ErrWillDeadlock {
lastError = errors.Wrapf(err, "please run 'podman system renumber' to resolve deadlocks")
continue
}
lastError = errors.Wrapf(err, "unable to start container %q", container)
continue
}