mirror of
https://github.com/containers/podman.git
synced 2025-05-21 00:56:36 +08:00
Add basic deadlock detection for container start/remove
We can easily tell if we're going to deadlock by comparing lock IDs before actually taking the lock. Add a few checks for this in common places where deadlocks might occur. This does not yet cover pod operations, where detection is more difficult (and costly) due to the number of locks being involved being higher than 2. Also, add some error wrapping on the Podman side, so we can tell people to use `system renumber` when it occurs. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
@ -4,8 +4,10 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/containers/libpod/cmd/podman/cliconfig"
|
||||
"github.com/containers/libpod/libpod/define"
|
||||
"github.com/containers/libpod/pkg/adapter"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
@ -77,6 +79,9 @@ func rmCmd(c *cliconfig.RmValues) error {
|
||||
|
||||
if len(failures) > 0 {
|
||||
for _, err := range failures {
|
||||
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||
logrus.Errorf("Potential deadlock detected - please run 'podman system renumber' to resolve")
|
||||
}
|
||||
exitCode = setExitCode(err)
|
||||
}
|
||||
}
|
||||
|
@ -1401,6 +1401,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
|
||||
return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID())
|
||||
}
|
||||
|
||||
if vol.config.LockID == c.config.LockID {
|
||||
return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID)
|
||||
}
|
||||
vol.lock.Lock()
|
||||
defer vol.lock.Unlock()
|
||||
if vol.needsMount() {
|
||||
|
@ -61,6 +61,11 @@ var (
|
||||
// the user.
|
||||
ErrDetach = utils.ErrDetach
|
||||
|
||||
// ErrWillDeadlock indicates that the requested operation will cause a
|
||||
// deadlock. This is usually caused by upgrade issues, and is resolved
|
||||
// by renumbering the locks.
|
||||
ErrWillDeadlock = errors.New("deadlock due to lock mismatch")
|
||||
|
||||
// ErrNoCgroups indicates that the container does not have its own
|
||||
// CGroup.
|
||||
ErrNoCgroups = errors.New("this container does not have a cgroup")
|
||||
|
@ -412,6 +412,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
|
||||
}
|
||||
|
||||
// Lock the pod while we're removing container
|
||||
if pod.config.LockID == c.config.LockID {
|
||||
return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID)
|
||||
}
|
||||
pod.lock.Lock()
|
||||
defer pod.lock.Unlock()
|
||||
if err := pod.updatePod(); err != nil {
|
||||
|
@ -36,9 +36,6 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool) error
|
||||
}
|
||||
}
|
||||
|
||||
v.lock.Lock()
|
||||
defer v.lock.Unlock()
|
||||
|
||||
return r.removeVolume(ctx, v, force)
|
||||
}
|
||||
|
||||
|
@ -124,6 +124,9 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error
|
||||
return define.ErrVolumeRemoved
|
||||
}
|
||||
|
||||
v.lock.Lock()
|
||||
defer v.lock.Unlock()
|
||||
|
||||
// Update volume status to pick up a potential removal from state
|
||||
if err := v.update(); err != nil {
|
||||
return err
|
||||
|
@ -469,6 +469,10 @@ func (r *LocalRuntime) Run(ctx context.Context, c *cliconfig.RunValues, exitCode
|
||||
logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID())
|
||||
}
|
||||
}
|
||||
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||
logrus.Debugf("Deadlock error: %v", err)
|
||||
return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
|
||||
}
|
||||
return define.ExitCode(err), err
|
||||
}
|
||||
|
||||
@ -702,6 +706,11 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
|
||||
return exitCode, nil
|
||||
}
|
||||
|
||||
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||
logrus.Debugf("Deadlock error: %v", err)
|
||||
return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
|
||||
}
|
||||
|
||||
if ctrRunning {
|
||||
return 0, err
|
||||
}
|
||||
@ -735,6 +744,10 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
|
||||
if lastError != nil {
|
||||
fmt.Fprintln(os.Stderr, lastError)
|
||||
}
|
||||
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||
lastError = errors.Wrapf(err, "please run 'podman system renumber' to resolve deadlocks")
|
||||
continue
|
||||
}
|
||||
lastError = errors.Wrapf(err, "unable to start container %q", container)
|
||||
continue
|
||||
}
|
||||
|
Reference in New Issue
Block a user