mirror of
https://github.com/containers/podman.git
synced 2025-06-26 04:46:57 +08:00
Remove leftover autoremove containers during refresh
During system shutdown, Podman should go down gracefully, meaning that we have time to spawn cleanup processes which remove any containers set to autoremove. Unfortunately, this isn't always the case. If we get a SIGKILL because the system is going down immediately, we can't recover from this, and the autoremove containers are not removed. However, we can pick up any leftover autoremove containers when we refesh the DB state, which is the first thing Podman does after a reboot. By detecting any autoremove containers that have actually run (a container that was created but never run doesn't need to be removed) at that point and removing them, we keep the fresh boot clean, even if Podman was terminated abnormally. Fixes #21482 [NO NEW TESTS NEEDED] This requires a reboot to realistically test. Signed-off-by: Matt Heon <mheon@redhat.com>
This commit is contained in:
@ -166,7 +166,7 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return newRuntimeFromConfig(conf, options...)
|
||||
return newRuntimeFromConfig(ctx, conf, options...)
|
||||
}
|
||||
|
||||
// NewRuntimeFromConfig creates a new container runtime using the given
|
||||
@ -175,10 +175,10 @@ func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error)
|
||||
// An error will be returned if the configuration file at the given path does
|
||||
// not exist or cannot be loaded
|
||||
func NewRuntimeFromConfig(ctx context.Context, userConfig *config.Config, options ...RuntimeOption) (*Runtime, error) {
|
||||
return newRuntimeFromConfig(userConfig, options...)
|
||||
return newRuntimeFromConfig(ctx, userConfig, options...)
|
||||
}
|
||||
|
||||
func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
|
||||
func newRuntimeFromConfig(ctx context.Context, conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
|
||||
runtime := new(Runtime)
|
||||
|
||||
if conf.Engine.OCIRuntime == "" {
|
||||
@ -223,7 +223,7 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti
|
||||
return nil, fmt.Errorf("starting shutdown signal handler: %w", err)
|
||||
}
|
||||
|
||||
if err := makeRuntime(runtime); err != nil {
|
||||
if err := makeRuntime(ctx, runtime); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -333,7 +333,7 @@ func getDBState(runtime *Runtime) (State, error) {
|
||||
|
||||
// Make a new runtime based on the given configuration
|
||||
// Sets up containers/storage, state store, OCI runtime
|
||||
func makeRuntime(runtime *Runtime) (retErr error) {
|
||||
func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) {
|
||||
// Find a working conmon binary
|
||||
cPath, err := runtime.config.FindConmon()
|
||||
if err != nil {
|
||||
@ -629,6 +629,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
|
||||
return err
|
||||
}
|
||||
|
||||
// Mark the runtime as valid - ready to be used, cannot be modified
|
||||
// further.
|
||||
// Need to do this *before* refresh as we can remove containers there.
|
||||
// Should not be a big deal as we don't return it to users until after
|
||||
// refresh runs.
|
||||
runtime.valid = true
|
||||
|
||||
// If we need to refresh the state, do it now - things are guaranteed to
|
||||
// be set up by now.
|
||||
if doRefresh {
|
||||
@ -639,17 +646,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
|
||||
}
|
||||
}
|
||||
|
||||
if err2 := runtime.refresh(runtimeAliveFile); err2 != nil {
|
||||
if err2 := runtime.refresh(ctx, runtimeAliveFile); err2 != nil {
|
||||
return err2
|
||||
}
|
||||
}
|
||||
|
||||
runtime.startWorker()
|
||||
|
||||
// Mark the runtime as valid - ready to be used, cannot be modified
|
||||
// further
|
||||
runtime.valid = true
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -819,7 +822,7 @@ func (r *Runtime) Shutdown(force bool) error {
|
||||
// Reconfigures the runtime after a reboot
|
||||
// Refreshes the state, recreating temporary files
|
||||
// Does not check validity as the runtime is not valid until after this has run
|
||||
func (r *Runtime) refresh(alivePath string) error {
|
||||
func (r *Runtime) refresh(ctx context.Context, alivePath string) error {
|
||||
logrus.Debugf("Podman detected system restart - performing state refresh")
|
||||
|
||||
// Clear state of database if not running in container
|
||||
@ -856,6 +859,22 @@ func (r *Runtime) refresh(alivePath string) error {
|
||||
if err := ctr.refresh(); err != nil {
|
||||
logrus.Errorf("Refreshing container %s: %v", ctr.ID(), err)
|
||||
}
|
||||
// This is the only place it's safe to use ctr.state.State unlocked
|
||||
// We're holding the alive lock, guaranteed to be the only Libpod on the system right now.
|
||||
if ctr.AutoRemove() && ctr.state.State == define.ContainerStateExited {
|
||||
opts := ctrRmOpts{
|
||||
// Don't force-remove, we're supposed to be fresh off a reboot
|
||||
// If we have to force something is seriously wrong
|
||||
Force: false,
|
||||
RemoveVolume: true,
|
||||
}
|
||||
// This container should have autoremoved before the
|
||||
// reboot but did not.
|
||||
// Get rid of it.
|
||||
if _, _, err := r.removeContainer(ctx, ctr, opts); err != nil {
|
||||
logrus.Errorf("Unable to remove container %s which should have autoremoved: %v", ctr.ID(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, pod := range pods {
|
||||
if err := pod.refresh(); err != nil {
|
||||
|
Reference in New Issue
Block a user