Fix a race removing multiple containers in the same pod

If the first container to get the pod lock is the infra container
it's going to want to remove the entire pod, which will also
remove every other container in the pod. Subsequent containers
will get the pod lock and try to access the pod, only to realize
it no longer exists - and that, actually, the container being
removed also no longer exists.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
Matthew Heon
2023-06-02 13:13:13 -04:00
parent 0e47465e4a
commit a750cd9876

View File

@ -718,6 +718,21 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, opts ctrRmO
if c.config.Pod != "" {
pod, err = r.state.Pod(c.config.Pod)
if err != nil {
// There's a potential race here where the pod we are in
// was already removed.
// If so, this container is also removed, as pods take
// all their containers with them.
// So if it's already gone, check if we are too.
if errors.Is(err, define.ErrNoSuchPod) {
// We could check the DB to see if we still
// exist, but that would be a serious violation
// of DB integrity.
// Mark this container as removed so there's no
// confusion, though.
removedCtrs[c.ID()] = nil
return
}
retErr = err
return
}
@ -733,6 +748,13 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, opts ctrRmO
defer pod.lock.Unlock()
}
if err := pod.updatePod(); err != nil {
// As above, there's a chance the pod was
// already removed.
if errors.Is(err, define.ErrNoSuchPod) {
removedCtrs[c.ID()] = nil
return
}
retErr = err
return
}