Merge pull request #14466 from mheon/fix_9075

Improve robustness of `podman system reset`
This commit is contained in:
OpenShift Merge Robot
2022-06-03 17:54:57 -04:00
committed by GitHub
7 changed files with 170 additions and 16 deletions

View File

@ -91,20 +91,12 @@ func reset(cmd *cobra.Command, args []string) {
registry.ContainerEngine().Shutdown(registry.Context())
registry.ImageEngine().Shutdown(registry.Context())
engine, err := infra.NewSystemEngine(entities.ResetMode, registry.PodmanConfig())
if err != nil {
// Do not try to shut the engine down, as a Reset engine is not valid
// after its creation.
if _, err := infra.NewSystemEngine(entities.ResetMode, registry.PodmanConfig()); err != nil {
logrus.Error(err)
os.Exit(define.ExecErrorCodeGeneric)
}
defer engine.Shutdown(registry.Context())
if err := engine.Reset(registry.Context()); err != nil {
logrus.Error(err)
// FIXME change this to return the error like other commands
// defer will never run on os.Exit()
//nolint:gocritic
os.Exit(define.ExecErrorCodeGeneric)
}
// Shutdown podman-machine and delete all machine files
if err := resetMachine(); err != nil {

View File

@ -435,6 +435,21 @@ func WithDefaultInfraCommand(cmd string) RuntimeOption {
}
}
// WithReset instructs libpod to reset all storage to factory defaults.
// All containers, pods, volumes, images, and networks will be removed.
// All directories created by Libpod will be removed.
func WithReset() RuntimeOption {
return func(rt *Runtime) error {
if rt.valid {
return define.ErrRuntimeFinalized
}
rt.doReset = true
return nil
}
}
// WithRenumber instructs libpod to perform a lock renumbering while
// initializing. This will handle migrations from early versions of libpod with
// file locks to newer versions with SHM locking, as well as changes in the

View File

@ -17,8 +17,78 @@ import (
"github.com/sirupsen/logrus"
)
// removeAllDirs removes all Podman storage directories. It is intended to be
// used as a backup for reset() when that function cannot be used due to
// failures in initializing libpod.
// It does not expect that all the directories match what is in use by Podman,
// as this is a common failure point for `system reset`. As such, our ability to
// interface with containers and pods is somewhat limited.
// This function assumes that we do not have a working c/storage store.
func (r *Runtime) removeAllDirs() error {
var lastErr error
// Grab the runtime alive lock.
// This ensures that no other Podman process can run while we are doing
// a reset, so no race conditions with containers/pods/etc being created
// while we are resetting storage.
// TODO: maybe want a helper for getting the path? This is duped from
// runtime.go
runtimeAliveLock := filepath.Join(r.config.Engine.TmpDir, "alive.lck")
aliveLock, err := storage.GetLockfile(runtimeAliveLock)
if err != nil {
logrus.Errorf("Lock runtime alive lock %s: %v", runtimeAliveLock, err)
} else {
aliveLock.Lock()
defer aliveLock.Unlock()
}
// We do not have a store - so we can't really try and remove containers
// or pods or volumes...
// Try and remove the directories, in hopes that they are unmounted.
// This is likely to fail but it's the best we can do.
// Volume path
if err := os.RemoveAll(r.config.Engine.VolumePath); err != nil {
lastErr = errors.Wrapf(err, "removing volume path")
}
// Tmpdir
if err := os.RemoveAll(r.config.Engine.TmpDir); err != nil {
if lastErr != nil {
logrus.Errorf("Reset: %v", lastErr)
}
lastErr = errors.Wrapf(err, "removing tmp dir")
}
// Runroot
if err := os.RemoveAll(r.storageConfig.RunRoot); err != nil {
if lastErr != nil {
logrus.Errorf("Reset: %v", lastErr)
}
lastErr = errors.Wrapf(err, "removing run root")
}
// Static dir
if err := os.RemoveAll(r.config.Engine.StaticDir); err != nil {
if lastErr != nil {
logrus.Errorf("Reset: %v", lastErr)
}
lastErr = errors.Wrapf(err, "removing static dir")
}
// Graph root
if err := os.RemoveAll(r.storageConfig.GraphRoot); err != nil {
if lastErr != nil {
logrus.Errorf("Reset: %v", lastErr)
}
lastErr = errors.Wrapf(err, "removing graph root")
}
return lastErr
}
// Reset removes all storage
func (r *Runtime) Reset(ctx context.Context) error {
func (r *Runtime) reset(ctx context.Context) error {
var timeout *uint
pods, err := r.GetAllPods()
if err != nil {

View File

@ -96,6 +96,10 @@ type Runtime struct {
// This bool is just needed so that we can set it for netavark interface.
syslog bool
// doReset indicates that the runtime should perform a system reset.
// All Podman files will be removed.
doReset bool
// doRenumber indicates that the runtime should perform a lock renumber
// during initialization.
// Once the runtime has been initialized and returned, this variable is
@ -235,6 +239,11 @@ func newRuntimeFromConfig(conf *config.Config, options ...RuntimeOption) (*Runti
runtime.config.CheckCgroupsAndAdjustConfig()
// If resetting storage, do *not* return a runtime.
if runtime.doReset {
return nil, nil
}
return runtime, nil
}
@ -305,6 +314,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
}
runtime.conmonPath = cPath
if runtime.noStore && runtime.doReset {
return errors.Wrapf(define.ErrInvalidArg, "cannot perform system reset if runtime is not creating a store")
}
if runtime.doReset && runtime.doRenumber {
return errors.Wrapf(define.ErrInvalidArg, "cannot perform system reset while renumbering locks")
}
// Make the static files directory if it does not exist
if err := os.MkdirAll(runtime.config.Engine.StaticDir, 0700); err != nil {
// The directory is allowed to exist
@ -339,6 +355,20 @@ func makeRuntime(runtime *Runtime) (retErr error) {
// Grab config from the database so we can reset some defaults
dbConfig, err := runtime.state.GetDBConfig()
if err != nil {
if runtime.doReset {
// We can at least delete the DB and the static files
// directory.
// Can't safely touch anything else because we aren't
// sure of other directories.
if err := runtime.state.Close(); err != nil {
logrus.Errorf("Closing database connection: %v", err)
} else {
if err := os.RemoveAll(runtime.config.Engine.StaticDir); err != nil {
logrus.Errorf("Removing static files directory %v: %v", runtime.config.Engine.StaticDir, err)
}
}
}
return errors.Wrapf(err, "error retrieving runtime configuration from database")
}
@ -372,7 +402,13 @@ func makeRuntime(runtime *Runtime) (retErr error) {
// Validate our config against the database, now that we've set our
// final storage configuration
if err := runtime.state.ValidateDBConfig(runtime); err != nil {
return err
// If we are performing a storage reset: continue on with a
// warning. Otherwise we can't `system reset` after a change to
// the core paths.
if !runtime.doReset {
return err
}
logrus.Errorf("Runtime paths differ from those stored in database, storage reset may not remove all files")
}
if err := runtime.state.SetNamespace(runtime.config.Engine.Namespace); err != nil {
@ -394,6 +430,14 @@ func makeRuntime(runtime *Runtime) (retErr error) {
} else if runtime.noStore {
logrus.Debug("No store required. Not opening container store.")
} else if err := runtime.configureStore(); err != nil {
// Make a best-effort attempt to clean up if performing a
// storage reset.
if runtime.doReset {
if err := runtime.removeAllDirs(); err != nil {
logrus.Errorf("Removing libpod directories: %v", err)
}
}
return err
}
defer func() {
@ -575,6 +619,18 @@ func makeRuntime(runtime *Runtime) (retErr error) {
return err
}
// If we're resetting storage, do it now.
// We will not return a valid runtime.
// TODO: Plumb this context out so it can be set.
if runtime.doReset {
// Mark the runtime as valid, so normal functionality "mostly"
// works and we can use regular functions to remove
// ctrs/pods/etc
runtime.valid = true
return runtime.reset(context.Background())
}
// If we're renumbering locks, do it now.
// It breaks out of normal runtime init, and will not return a valid
// runtime.
@ -818,7 +874,7 @@ func (r *Runtime) DeferredShutdown(force bool) {
// still containers running or mounted
func (r *Runtime) Shutdown(force bool) error {
if !r.valid {
return define.ErrRuntimeStopped
return nil
}
if r.workerChannel != nil {

View File

@ -328,7 +328,7 @@ func (ic *ContainerEngine) SystemDf(ctx context.Context, options entities.System
}
func (se *SystemEngine) Reset(ctx context.Context) error {
return se.Libpod.Reset(ctx)
return nil
}
func (se *SystemEngine) Renumber(ctx context.Context, flags *pflag.FlagSet, config *entities.PodmanConfig) error {

View File

@ -53,7 +53,7 @@ func NewSystemEngine(setup entities.EngineSetup, facts *entities.PodmanConfig) (
case entities.RenumberMode:
r, err = GetRuntimeRenumber(context.Background(), facts.FlagSet, facts)
case entities.ResetMode:
r, err = GetRuntimeRenumber(context.Background(), facts.FlagSet, facts)
r, err = GetRuntimeReset(context.Background(), facts.FlagSet, facts)
case entities.MigrateMode:
name, flagErr := facts.FlagSet.GetString("new-runtime")
if flagErr != nil {

View File

@ -37,6 +37,7 @@ type engineOpts struct {
migrate bool
noStore bool
withFDS bool
reset bool
config *entities.PodmanConfig
}
@ -48,6 +49,7 @@ func GetRuntimeMigrate(ctx context.Context, fs *flag.FlagSet, cfg *entities.Podm
migrate: true,
noStore: false,
withFDS: true,
reset: false,
config: cfg,
})
}
@ -59,6 +61,7 @@ func GetRuntimeDisableFDs(ctx context.Context, fs *flag.FlagSet, cfg *entities.P
migrate: false,
noStore: false,
withFDS: false,
reset: false,
config: cfg,
})
}
@ -70,6 +73,7 @@ func GetRuntimeRenumber(ctx context.Context, fs *flag.FlagSet, cfg *entities.Pod
migrate: false,
noStore: false,
withFDS: true,
reset: false,
config: cfg,
})
}
@ -82,6 +86,7 @@ func GetRuntime(ctx context.Context, flags *flag.FlagSet, cfg *entities.PodmanCo
migrate: false,
noStore: false,
withFDS: true,
reset: false,
config: cfg,
})
})
@ -95,6 +100,18 @@ func GetRuntimeNoStore(ctx context.Context, fs *flag.FlagSet, cfg *entities.Podm
migrate: false,
noStore: true,
withFDS: true,
reset: false,
config: cfg,
})
}
func GetRuntimeReset(ctx context.Context, fs *flag.FlagSet, cfg *entities.PodmanConfig) (*libpod.Runtime, error) {
return getRuntime(ctx, fs, &engineOpts{
renumber: false,
migrate: false,
noStore: false,
withFDS: true,
reset: true,
config: cfg,
})
}
@ -161,6 +178,10 @@ func getRuntime(ctx context.Context, fs *flag.FlagSet, opts *engineOpts) (*libpo
}
}
if opts.reset {
options = append(options, libpod.WithReset())
}
if opts.renumber {
options = append(options, libpod.WithRenumber())
}