Remove leftover autoremove containers during refresh

During system shutdown, Podman should go down gracefully, meaning
that we have time to spawn cleanup processes which remove any
containers set to autoremove. Unfortunately, this isn't always
the case. If we get a SIGKILL because the system is going down
immediately, we can't recover from this, and the autoremove
containers are not removed.

However, we can pick up any leftover autoremove containers when
we refesh the DB state, which is the first thing Podman does
after a reboot. By detecting any autoremove containers that have
actually run (a container that was created but never run doesn't
need to be removed) at that point and removing them, we keep the
fresh boot clean, even if Podman was terminated abnormally.

Fixes #21482

[NO NEW TESTS NEEDED] This requires a reboot to realistically
test.

Signed-off-by: Matt Heon <mheon@redhat.com>
This commit is contained in:
Matt Heon
2024-02-06 16:08:39 -05:00
parent 22b1650619
commit 9983e87440

View File

@ -166,7 +166,7 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
if err != nil {
return nil, err
}
return newRuntimeFromConfig(conf, options...)
return newRuntimeFromConfig(ctx, conf, options...)
}
// NewRuntimeFromConfig creates a new container runtime using the given
@ -175,10 +175,10 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
// An error will be returned if the configuration file at the given path does
// not exist or cannot be loaded
func NewRuntimeFromConfig(ctx context.Context, userConfig *config.Config, options ...RuntimeOption) (*Runtime, error) {
return newRuntimeFromConfig(userConfig, options...)
return newRuntimeFromConfig(ctx, userConfig, options...)
}
func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
func newRuntimeFromConfig(ctx context.Context, conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
runtime := new(Runtime)
if conf.Engine.OCIRuntime == "" {
@ -223,7 +223,7 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti
return nil, fmt.Errorf("starting shutdown signal handler: %w", err)
}
if err := makeRuntime(runtime); err != nil {
if err := makeRuntime(ctx, runtime); err != nil {
return nil, err
}
@ -333,7 +333,7 @@ func getDBState(runtime *Runtime) (State, error) {
// Make a new runtime based on the given configuration
// Sets up containers/storage, state store, OCI runtime
func makeRuntime(runtime *Runtime) (retErr error) {
func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) {
// Find a working conmon binary
cPath, err := runtime.config.FindConmon()
if err != nil {
@ -629,6 +629,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
return err
}
// Mark the runtime as valid - ready to be used, cannot be modified
// further.
// Need to do this *before* refresh as we can remove containers there.
// Should not be a big deal as we don't return it to users until after
// refresh runs.
runtime.valid = true
// If we need to refresh the state, do it now - things are guaranteed to
// be set up by now.
if doRefresh {
@ -639,17 +646,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
}
if err2 := runtime.refresh(runtimeAliveFile); err2 != nil {
if err2 := runtime.refresh(ctx, runtimeAliveFile); err2 != nil {
return err2
}
}
runtime.startWorker()
// Mark the runtime as valid - ready to be used, cannot be modified
// further
runtime.valid = true
return nil
}
@ -819,7 +822,7 @@ func (r *Runtime) Shutdown(force bool) error {
// Reconfigures the runtime after a reboot
// Refreshes the state, recreating temporary files
// Does not check validity as the runtime is not valid until after this has run
func (r *Runtime) refresh(alivePath string) error {
func (r *Runtime) refresh(ctx context.Context, alivePath string) error {
logrus.Debugf("Podman detected system restart - performing state refresh")
// Clear state of database if not running in container
@ -856,6 +859,22 @@ func (r *Runtime) refresh(alivePath string) error {
if err := ctr.refresh(); err != nil {
logrus.Errorf("Refreshing container %s: %v", ctr.ID(), err)
}
// This is the only place it's safe to use ctr.state.State unlocked
// We're holding the alive lock, guaranteed to be the only Libpod on the system right now.
if ctr.AutoRemove() && ctr.state.State == define.ContainerStateExited {
opts := ctrRmOpts{
// Don't force-remove, we're supposed to be fresh off a reboot
// If we have to force something is seriously wrong
Force: false,
RemoveVolume: true,
}
// This container should have autoremoved before the
// reboot but did not.
// Get rid of it.
if _, _, err := r.removeContainer(ctx, ctr, opts); err != nil {
logrus.Errorf("Unable to remove container %s which should have autoremoved: %v", ctr.ID(), err)
}
}
}
for _, pod := range pods {
if err := pod.refresh(); err != nil {