mirror of
https://github.com/containers/podman.git
synced 2025-10-19 20:23:08 +08:00
Merge pull request #5305 from mheon/check_for_common_deadlocks
Add basic deadlock detection for container start/remove
This commit is contained in:
@ -4,8 +4,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/containers/libpod/cmd/podman/cliconfig"
|
"github.com/containers/libpod/cmd/podman/cliconfig"
|
||||||
|
"github.com/containers/libpod/libpod/define"
|
||||||
"github.com/containers/libpod/pkg/adapter"
|
"github.com/containers/libpod/pkg/adapter"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -77,6 +79,9 @@ func rmCmd(c *cliconfig.RmValues) error {
|
|||||||
|
|
||||||
if len(failures) > 0 {
|
if len(failures) > 0 {
|
||||||
for _, err := range failures {
|
for _, err := range failures {
|
||||||
|
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||||
|
logrus.Errorf("Potential deadlock detected - please run 'podman system renumber' to resolve")
|
||||||
|
}
|
||||||
exitCode = setExitCode(err)
|
exitCode = setExitCode(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1401,6 +1401,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string)
|
|||||||
return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID())
|
return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if vol.config.LockID == c.config.LockID {
|
||||||
|
return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID)
|
||||||
|
}
|
||||||
vol.lock.Lock()
|
vol.lock.Lock()
|
||||||
defer vol.lock.Unlock()
|
defer vol.lock.Unlock()
|
||||||
if vol.needsMount() {
|
if vol.needsMount() {
|
||||||
|
@ -61,6 +61,11 @@ var (
|
|||||||
// the user.
|
// the user.
|
||||||
ErrDetach = utils.ErrDetach
|
ErrDetach = utils.ErrDetach
|
||||||
|
|
||||||
|
// ErrWillDeadlock indicates that the requested operation will cause a
|
||||||
|
// deadlock. This is usually caused by upgrade issues, and is resolved
|
||||||
|
// by renumbering the locks.
|
||||||
|
ErrWillDeadlock = errors.New("deadlock due to lock mismatch")
|
||||||
|
|
||||||
// ErrNoCgroups indicates that the container does not have its own
|
// ErrNoCgroups indicates that the container does not have its own
|
||||||
// CGroup.
|
// CGroup.
|
||||||
ErrNoCgroups = errors.New("this container does not have a cgroup")
|
ErrNoCgroups = errors.New("this container does not have a cgroup")
|
||||||
|
@ -412,6 +412,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Lock the pod while we're removing container
|
// Lock the pod while we're removing container
|
||||||
|
if pod.config.LockID == c.config.LockID {
|
||||||
|
return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID)
|
||||||
|
}
|
||||||
pod.lock.Lock()
|
pod.lock.Lock()
|
||||||
defer pod.lock.Unlock()
|
defer pod.lock.Unlock()
|
||||||
if err := pod.updatePod(); err != nil {
|
if err := pod.updatePod(); err != nil {
|
||||||
|
@ -36,9 +36,6 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool) error
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
v.lock.Lock()
|
|
||||||
defer v.lock.Unlock()
|
|
||||||
|
|
||||||
return r.removeVolume(ctx, v, force)
|
return r.removeVolume(ctx, v, force)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,6 +124,9 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error
|
|||||||
return define.ErrVolumeRemoved
|
return define.ErrVolumeRemoved
|
||||||
}
|
}
|
||||||
|
|
||||||
|
v.lock.Lock()
|
||||||
|
defer v.lock.Unlock()
|
||||||
|
|
||||||
// Update volume status to pick up a potential removal from state
|
// Update volume status to pick up a potential removal from state
|
||||||
if err := v.update(); err != nil {
|
if err := v.update(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -469,6 +469,10 @@ func (r *LocalRuntime) Run(ctx context.Context, c *cliconfig.RunValues, exitCode
|
|||||||
logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID())
|
logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||||
|
logrus.Debugf("Deadlock error: %v", err)
|
||||||
|
return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
|
||||||
|
}
|
||||||
return define.ExitCode(err), err
|
return define.ExitCode(err), err
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -702,6 +706,11 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
|
|||||||
return exitCode, nil
|
return exitCode, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||||
|
logrus.Debugf("Deadlock error: %v", err)
|
||||||
|
return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID())
|
||||||
|
}
|
||||||
|
|
||||||
if ctrRunning {
|
if ctrRunning {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
@ -735,6 +744,10 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP
|
|||||||
if lastError != nil {
|
if lastError != nil {
|
||||||
fmt.Fprintln(os.Stderr, lastError)
|
fmt.Fprintln(os.Stderr, lastError)
|
||||||
}
|
}
|
||||||
|
if errors.Cause(err) == define.ErrWillDeadlock {
|
||||||
|
lastError = errors.Wrapf(err, "please run 'podman system renumber' to resolve deadlocks")
|
||||||
|
continue
|
||||||
|
}
|
||||||
lastError = errors.Wrapf(err, "unable to start container %q", container)
|
lastError = errors.Wrapf(err, "unable to start container %q", container)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user