podman/libpod/runtime.go

//go:build !remote

package libpod

import (
	"bufio"
	"context"
	"errors"
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"slices"
	"strings"
	"sync"
	"syscall"
	"time"

	"github.com/containers/buildah/pkg/parse"
	"github.com/containers/common/libimage"
	"github.com/containers/common/libnetwork/network"
	nettypes "github.com/containers/common/libnetwork/types"
	"github.com/containers/common/pkg/cgroups"
	"github.com/containers/common/pkg/config"
	"github.com/containers/common/pkg/secrets"
	systemdCommon "github.com/containers/common/pkg/systemd"
	"github.com/containers/image/v5/pkg/sysregistriesv2"
	is "github.com/containers/image/v5/storage"
	"github.com/containers/image/v5/types"
	"github.com/containers/podman/v5/libpod/define"
	"github.com/containers/podman/v5/libpod/events"
	"github.com/containers/podman/v5/libpod/lock"
	"github.com/containers/podman/v5/libpod/plugin"
	"github.com/containers/podman/v5/libpod/shutdown"
	"github.com/containers/podman/v5/pkg/domain/entities"
	"github.com/containers/podman/v5/pkg/domain/entities/reports"
	artStore "github.com/containers/podman/v5/pkg/libartifact/store"
	"github.com/containers/podman/v5/pkg/rootless"
	"github.com/containers/podman/v5/pkg/systemd"
	"github.com/containers/podman/v5/pkg/util"
	"github.com/containers/storage"
	"github.com/containers/storage/pkg/fileutils"
	"github.com/containers/storage/pkg/lockfile"
	"github.com/containers/storage/pkg/unshare"
	"github.com/docker/docker/pkg/namesgenerator"
	"github.com/hashicorp/go-multierror"
	jsoniter "github.com/json-iterator/go"
	spec "github.com/opencontainers/runtime-spec/specs-go"
	"github.com/sirupsen/logrus"
)

// Set up the JSON library for all of Libpod
var json = jsoniter.ConfigCompatibleWithStandardLibrary

// A RuntimeOption is a functional option which alters the Runtime created by
// NewRuntime
type RuntimeOption func(*Runtime) error

type storageSet struct {
	RunRootSet         bool
	GraphRootSet       bool
	StaticDirSet       bool
	VolumePathSet      bool
	GraphDriverNameSet bool
	TmpDirSet          bool
}

// Runtime is the core libpod runtime
type Runtime struct {
	config        *config.Config
	storageConfig storage.StoreOptions
	storageSet    storageSet

	state                  State
	store                  storage.Store
	storageService         *storageService
	imageContext           *types.SystemContext
	defaultOCIRuntime      OCIRuntime
	ociRuntimes            map[string]OCIRuntime
	runtimeFlags           []string
	network                nettypes.ContainerNetwork
	conmonPath             string
	libimageRuntime        *libimage.Runtime
	libimageEventsShutdown chan bool
	lockManager            lock.Manager

	// ArtifactStore returns the artifact store created from the runtime.
	ArtifactStore func() (*artStore.ArtifactStore, error)

	// Worker
	workerChannel chan func()
	workerGroup   sync.WaitGroup

	// syslog describes whenever logrus should log to the syslog as well.
	// Note that the syslog hook will be enabled early in cmd/podman/syslog_linux.go
	// This bool is just needed so that we can set it for netavark interface.
	syslog bool

	// doReset indicates that the runtime will perform a system reset.
	// A reset will remove all containers, pods, volumes, networks, etc.
	// A number of validation checks are relaxed, or replaced with logic to
	// remove as much of the runtime as possible if they fail. This ensures
	// that even a broken Libpod can still be removed via `system reset`.
	// This does not actually perform a `system reset`. That is done by
	// calling "Reset()" on the returned runtime.
	doReset bool
	// doRenumber indicates that the runtime will perform a system renumber.
	// A renumber will reassign lock numbers for all containers, pods, etc.
	// This will not perform the renumber itself, but will ignore some
	// errors related to lock initialization so a renumber can be performed
	// if something has gone wrong.
	doRenumber bool

	// valid indicates whether the runtime is ready to use.
	// valid is set to true when a runtime is returned from GetRuntime(),
	// and remains true until the runtime is shut down (rendering its
	// storage unusable). When valid is false, the runtime cannot be used.
	valid bool

	// mechanism to read and write even logs
	eventer events.Eventer

	// secretsManager manages secrets
	secretsManager *secrets.SecretsManager
}

// SetXdgDirs ensures the XDG_RUNTIME_DIR env and XDG_CONFIG_HOME variables are set.
// containers/image uses XDG_RUNTIME_DIR to locate the auth file, XDG_CONFIG_HOME is
// use for the containers.conf configuration file.
func SetXdgDirs() error {
	if !rootless.IsRootless() {
		return nil
	}

	// Set up XDG_RUNTIME_DIR
	runtimeDir := os.Getenv("XDG_RUNTIME_DIR")

	if runtimeDir == "" {
		var err error
		runtimeDir, err = util.GetRootlessRuntimeDir()
		if err != nil {
			return err
		}
	}
	if err := os.Setenv("XDG_RUNTIME_DIR", runtimeDir); err != nil {
		return fmt.Errorf("cannot set XDG_RUNTIME_DIR: %w", err)
	}

	if rootless.IsRootless() && os.Getenv("DBUS_SESSION_BUS_ADDRESS") == "" {
		sessionAddr := filepath.Join(runtimeDir, "bus")
		if err := fileutils.Exists(sessionAddr); err == nil {
			os.Setenv("DBUS_SESSION_BUS_ADDRESS", fmt.Sprintf("unix:path=%s", sessionAddr))
		}
	}

	// Set up XDG_CONFIG_HOME
	if cfgHomeDir := os.Getenv("XDG_CONFIG_HOME"); cfgHomeDir == "" {
		cfgHomeDir, err := util.GetRootlessConfigHomeDir()
		if err != nil {
			return err
		}
		if err := os.Setenv("XDG_CONFIG_HOME", cfgHomeDir); err != nil {
			return fmt.Errorf("cannot set XDG_CONFIG_HOME: %w", err)
		}
	}
	return nil
}

// NewRuntime creates a new container runtime
// Options can be passed to override the default configuration for the runtime
func NewRuntime(ctx context.Context, options ...RuntimeOption) (*Runtime, error) {
	conf, err := config.Default()
	if err != nil {
		return nil, err
	}
	return newRuntimeFromConfig(ctx, conf, options...)
}

// NewRuntimeFromConfig creates a new container runtime using the given
// configuration file for its default configuration. Passed RuntimeOption
// functions can be used to mutate this configuration further.
// An error will be returned if the configuration file at the given path does
// not exist or cannot be loaded
func NewRuntimeFromConfig(ctx context.Context, userConfig *config.Config, options ...RuntimeOption) (*Runtime, error) {
	return newRuntimeFromConfig(ctx, userConfig, options...)
}

func newRuntimeFromConfig(ctx context.Context, conf *config.Config, options ...RuntimeOption) (*Runtime, error) {
	runtime := new(Runtime)

	if conf.Engine.OCIRuntime == "" {
		conf.Engine.OCIRuntime = "runc"
		// If we're running on cgroups v2, default to using crun.
		if onCgroupsv2, _ := cgroups.IsCgroup2UnifiedMode(); onCgroupsv2 {
			conf.Engine.OCIRuntime = "crun"
		}
	}

	runtime.config = conf

	if err := SetXdgDirs(); err != nil {
		return nil, err
	}

	storeOpts, err := storage.DefaultStoreOptions()
	if err != nil {
		return nil, err
	}
	runtime.storageConfig = storeOpts

	// Overwrite config with user-given configuration options
	for _, opt := range options {
		if err := opt(runtime); err != nil {
			return nil, fmt.Errorf("configuring runtime: %w", err)
		}
	}

	if err := makeRuntime(ctx, runtime); err != nil {
		return nil, err
	}

	if err := shutdown.Register("libpod", func(sig os.Signal) error {
		if runtime.store != nil {
			_, _ = runtime.store.Shutdown(false)
		}
		return nil
	}); err != nil && !errors.Is(err, shutdown.ErrHandlerExists) {
		logrus.Errorf("Registering shutdown handler for libpod: %v", err)
	}

	if err := shutdown.Start(); err != nil {
		return nil, fmt.Errorf("starting shutdown signal handler: %w", err)
	}

	runtime.config.CheckCgroupsAndAdjustConfig()

	return runtime, nil
}

func getLockManager(runtime *Runtime) (lock.Manager, error) {
	var err error
	var manager lock.Manager

	switch runtime.config.Engine.LockType {
	case "file":
		lockPath := filepath.Join(runtime.config.Engine.TmpDir, "locks")
		manager, err = lock.OpenFileLockManager(lockPath)
		if err != nil {
			if errors.Is(err, os.ErrNotExist) {
				manager, err = lock.NewFileLockManager(lockPath)
				if err != nil {
					return nil, fmt.Errorf("failed to get new file lock manager: %w", err)
				}
			} else {
				return nil, err
			}
		}

	case "", "shm":
		lockPath := define.DefaultSHMLockPath
		if rootless.IsRootless() {
			lockPath = fmt.Sprintf("%s_%d", define.DefaultRootlessSHMLockPath, rootless.GetRootlessUID())
		}
		// Set up the lock manager
		manager, err = lock.OpenSHMLockManager(lockPath, runtime.config.Engine.NumLocks)
		if err != nil {
			switch {
			case errors.Is(err, os.ErrNotExist):
				manager, err = lock.NewSHMLockManager(lockPath, runtime.config.Engine.NumLocks)
				if err != nil {
					return nil, fmt.Errorf("failed to get new shm lock manager: %w", err)
				}
			case errors.Is(err, syscall.ERANGE) && runtime.doRenumber:
				logrus.Debugf("Number of locks does not match - removing old locks")

				// ERANGE indicates a lock numbering mismatch.
				// Since we're renumbering, this is not fatal.
				// Remove the earlier set of locks and recreate.
				if err := os.Remove(filepath.Join("/dev/shm", lockPath)); err != nil {
					return nil, fmt.Errorf("removing libpod locks file %s: %w", lockPath, err)
				}

				manager, err = lock.NewSHMLockManager(lockPath, runtime.config.Engine.NumLocks)
				if err != nil {
					return nil, err
				}
			default:
				return nil, err
			}
		}
	default:
		return nil, fmt.Errorf("unknown lock type %s: %w", runtime.config.Engine.LockType, define.ErrInvalidArg)
	}
	return manager, nil
}

func getDBState(runtime *Runtime) (State, error) {
	// TODO - if we further break out the state implementation into
	// libpod/state, the config could take care of the code below.  It
	// would further allow to move the types and consts into a coherent
	// package.
	backend, err := config.ParseDBBackend(runtime.config.Engine.DBBackend)
	if err != nil {
		return nil, err
	}

	// get default boltdb path
	baseDir := runtime.config.Engine.StaticDir
	if runtime.storageConfig.TransientStore {
		baseDir = runtime.config.Engine.TmpDir
	}
	boltDBPath := filepath.Join(baseDir, "bolt_state.db")

	switch backend {
	case config.DBBackendDefault:
		// for backwards compatibility check if boltdb exists, if it does not we use sqlite
		if err := fileutils.Exists(boltDBPath); err != nil {
			if errors.Is(err, fs.ErrNotExist) {
				// need to set DBBackend string so podman info will show the backend name correctly
				runtime.config.Engine.DBBackend = config.DBBackendSQLite.String()
				return NewSqliteState(runtime)
			}
			// Return error here some other problem with the boltdb file, rather than silently
			// switch to sqlite which would be hard to debug for the user return the error back
			// as this likely a real bug.
			return nil, err
		}
		runtime.config.Engine.DBBackend = config.DBBackendBoltDB.String()
		fallthrough
	case config.DBBackendBoltDB:
		return NewBoltState(boltDBPath, runtime)
	case config.DBBackendSQLite:
		return NewSqliteState(runtime)
	default:
		return nil, fmt.Errorf("unrecognized database backend passed (%q): %w", backend.String(), define.ErrInvalidArg)
	}
}

// Make a new runtime based on the given configuration
// Sets up containers/storage, state store, OCI runtime
func makeRuntime(ctx context.Context, runtime *Runtime) (retErr error) {
	// Find a working conmon binary
	cPath, err := runtime.config.FindConmon()
	if err != nil {
		return err
	}
	runtime.conmonPath = cPath

	if runtime.config.Engine.StaticDir == "" {
		runtime.config.Engine.StaticDir = filepath.Join(runtime.storageConfig.GraphRoot, "libpod")
		runtime.storageSet.StaticDirSet = true
	}

	if runtime.config.Engine.VolumePath == "" {
		runtime.config.Engine.VolumePath = filepath.Join(runtime.storageConfig.GraphRoot, "volumes")
		runtime.storageSet.VolumePathSet = true
	}

	// Make the static files directory if it does not exist
	if err := os.MkdirAll(runtime.config.Engine.StaticDir, 0700); err != nil {
		// The directory is allowed to exist
		if !errors.Is(err, os.ErrExist) {
			return fmt.Errorf("creating runtime static files directory %q: %w", runtime.config.Engine.StaticDir, err)
		}
	}

	// Create the TmpDir if needed
	if err := os.MkdirAll(runtime.config.Engine.TmpDir, 0751); err != nil {
		return fmt.Errorf("creating runtime temporary files directory: %w", err)
	}

	// Create the volume path if needed.
	// This is not strictly necessary at this point, but the path not
	// existing can cause troubles with DB path validation on OSTree based
	// systems. Ref: https://github.com/containers/podman/issues/23515
	if err := os.MkdirAll(runtime.config.Engine.VolumePath, 0700); err != nil {
		return fmt.Errorf("creating runtime volume path directory: %w", err)
	}

	// Set up the state.
	runtime.state, err = getDBState(runtime)
	if err != nil {
		return err
	}

	// Grab config from the database so we can reset some defaults
	dbConfig, err := runtime.state.GetDBConfig()
	if err != nil {
		if runtime.doReset {
			// We can at least delete the DB and the static files
			// directory.
			// Can't safely touch anything else because we aren't
			// sure of other directories.
			if err := runtime.state.Close(); err != nil {
				logrus.Errorf("Closing database connection: %v", err)
			} else {
				if err := os.RemoveAll(runtime.config.Engine.StaticDir); err != nil {
					logrus.Errorf("Removing static files directory %v: %v", runtime.config.Engine.StaticDir, err)
				}
			}
		}

		return fmt.Errorf("retrieving runtime configuration from database: %w", err)
	}

	runtime.mergeDBConfig(dbConfig)

	checkCgroups2UnifiedMode(runtime)

	logrus.Debugf("Using graph driver %s", runtime.storageConfig.GraphDriverName)
	logrus.Debugf("Using graph root %s", runtime.storageConfig.GraphRoot)
	logrus.Debugf("Using run root %s", runtime.storageConfig.RunRoot)
	logrus.Debugf("Using static dir %s", runtime.config.Engine.StaticDir)
	logrus.Debugf("Using tmp dir %s", runtime.config.Engine.TmpDir)
	logrus.Debugf("Using volume path %s", runtime.config.Engine.VolumePath)
	logrus.Debugf("Using transient store: %v", runtime.storageConfig.TransientStore)

	// Validate our config against the database, now that we've set our
	// final storage configuration
	if err := runtime.state.ValidateDBConfig(runtime); err != nil {
		// If we are performing a storage reset: continue on with a
		// warning. Otherwise we can't `system reset` after a change to
		// the core paths.
		if !runtime.doReset {
			return err
		}
		logrus.Errorf("Runtime paths differ from those stored in database, storage reset may not remove all files")
	}

	if runtime.config.Engine.Namespace != "" {
		return fmt.Errorf("namespaces are not supported by this version of Libpod, please unset the `namespace` field in containers.conf: %w", define.ErrNotImplemented)
	}

	needsUserns := os.Geteuid() != 0
	if !needsUserns {
		hasCapSysAdmin, err := unshare.HasCapSysAdmin()
		if err != nil {
			return err
		}
		needsUserns = !hasCapSysAdmin
	}
	// Set up containers/storage
	var store storage.Store
	if needsUserns {
		logrus.Debug("Not configuring container store")
	} else if err := runtime.configureStore(); err != nil {
		// Make a best-effort attempt to clean up if performing a
		// storage reset.
		if runtime.doReset {
			if err := runtime.removeAllDirs(); err != nil {
				logrus.Errorf("Removing libpod directories: %v", err)
			}
		}

		return fmt.Errorf("configure storage: %w", err)
	}
	defer func() {
		if retErr != nil && store != nil {
			// Don't forcibly shut down
			// We could be opening a store in use by another libpod
			if _, err := store.Shutdown(false); err != nil {
				logrus.Errorf("Removing store for partially-created runtime: %s", err)
			}
		}
	}()

	// Set up the eventer
	eventer, err := runtime.newEventer()
	if err != nil {
		return err
	}
	runtime.eventer = eventer

	// Set up containers/image
	if runtime.imageContext == nil {
		runtime.imageContext = &types.SystemContext{
			BigFilesTemporaryDir: parse.GetTempDir(),
		}
	}
	runtime.imageContext.SignaturePolicyPath = runtime.config.Engine.SignaturePolicyPath

	// Get us at least one working OCI runtime.
	runtime.ociRuntimes = make(map[string]OCIRuntime)

	// Initialize remaining OCI runtimes
	for name, paths := range runtime.config.Engine.OCIRuntimes {
		ociRuntime, err := newConmonOCIRuntime(name, paths, runtime.conmonPath, runtime.runtimeFlags, runtime.config)
		if err != nil {
			// Don't fatally error.
			// This will allow us to ship configs including optional
			// runtimes that might not be installed (crun, kata).
			// Only an infof so default configs don't spec errors.
			logrus.Debugf("Configured OCI runtime %s initialization failed: %v", name, err)
			continue
		}

		runtime.ociRuntimes[name] = ociRuntime
	}

	// Do we have a default OCI runtime?
	if runtime.config.Engine.OCIRuntime != "" {
		// If the string starts with / it's a path to a runtime
		// executable.
		if strings.HasPrefix(runtime.config.Engine.OCIRuntime, "/") {
			ociRuntime, err := newConmonOCIRuntime(runtime.config.Engine.OCIRuntime, []string{runtime.config.Engine.OCIRuntime}, runtime.conmonPath, runtime.runtimeFlags, runtime.config)
			if err != nil {
				return err
			}

			runtime.ociRuntimes[runtime.config.Engine.OCIRuntime] = ociRuntime
			runtime.defaultOCIRuntime = ociRuntime
		} else {
			ociRuntime, ok := runtime.ociRuntimes[runtime.config.Engine.OCIRuntime]
			if !ok {
				return fmt.Errorf("default OCI runtime %q not found: %w", runtime.config.Engine.OCIRuntime, define.ErrInvalidArg)
			}
			runtime.defaultOCIRuntime = ociRuntime
		}
	}
	logrus.Debugf("Using OCI runtime %q", runtime.defaultOCIRuntime.Path())

	// Do we have at least one valid OCI runtime?
	if len(runtime.ociRuntimes) == 0 {
		return fmt.Errorf("no OCI runtime has been configured: %w", define.ErrInvalidArg)
	}

	// Do we have a default runtime?
	if runtime.defaultOCIRuntime == nil {
		return fmt.Errorf("no default OCI runtime was configured: %w", define.ErrInvalidArg)
	}

	// the store is only set up when we are in the userns so we do the same for the network interface
	if !needsUserns {
		netBackend, netInterface, err := network.NetworkBackend(runtime.store, runtime.config, runtime.syslog)
		if err != nil {
			return err
		}
		runtime.config.Network.NetworkBackend = string(netBackend)
		runtime.network = netInterface

		// Using sync once value to only init the store exactly once and only when it will be actually be used.
		runtime.ArtifactStore = sync.OnceValues(func() (*artStore.ArtifactStore, error) {
			return artStore.NewArtifactStore(filepath.Join(runtime.storageConfig.GraphRoot, "artifacts"), runtime.SystemContext())
		})
	}

	// We now need to see if the system has restarted
	// We check for the presence of a file in our tmp directory to verify this
	// This check must be locked to prevent races
	runtimeAliveFile := filepath.Join(runtime.config.Engine.TmpDir, "alive")
	aliveLock, err := runtime.getRuntimeAliveLock()
	if err != nil {
		return fmt.Errorf("acquiring runtime init lock: %w", err)
	}
	// Acquire the lock and hold it until we return
	// This ensures that no two processes will be in runtime.refresh at once
	aliveLock.Lock()
	doRefresh := false
	unLockFunc := aliveLock.Unlock
	defer func() {
		if unLockFunc != nil {
			unLockFunc()
		}
	}()

	err = fileutils.Exists(runtimeAliveFile)
	if err != nil {
		// If we need to refresh, then it is safe to assume there are
		// no containers running.  Create immediately a namespace, as
		// we will need to access the storage.
		if needsUserns {
			// warn users if mode is rootless and cgroup manager is systemd
			// and no valid systemd session is present
			// warn only whenever new namespace is created
			if runtime.config.Engine.CgroupManager == config.SystemdCgroupsManager {
				unified, _ := cgroups.IsCgroup2UnifiedMode()
				if unified && rootless.IsRootless() && !systemd.IsSystemdSessionValid(rootless.GetRootlessUID()) {
					logrus.Debug("Invalid systemd user session for current user")
				}
			}
			unLockFunc()
			unLockFunc = nil
			pausePid, err := util.GetRootlessPauseProcessPidPath()
			if err != nil {
				return fmt.Errorf("could not get pause process pid file path: %w", err)
			}

			// create the path in case it does not already exists
			// https://github.com/containers/podman/issues/8539
			if err := os.MkdirAll(filepath.Dir(pausePid), 0o700); err != nil {
				return fmt.Errorf("could not create pause process pid file directory: %w", err)
			}

			became, ret, err := rootless.BecomeRootInUserNS(pausePid)
			if err != nil {
				return err
			}
			if became {
				// Check if the pause process was created.  If it was created, then
				// move it to its own systemd scope.
				systemdCommon.MovePauseProcessToScope(pausePid)

				// gocritic complains because defer is not run on os.Exit()
				// However this is fine because the lock is released anyway when the process exits
				//nolint:gocritic
				os.Exit(ret)
			}
		}
		// If the file doesn't exist, we need to refresh the state
		// This will trigger on first use as well, but refreshing an
		// empty state only creates a single file
		// As such, it's not really a performance concern
		if errors.Is(err, os.ErrNotExist) {
			doRefresh = true
		} else {
			return fmt.Errorf("reading runtime status file %s: %w", runtimeAliveFile, err)
		}
	}

	runtime.lockManager, err = getLockManager(runtime)
	if err != nil {
		return err
	}

	// Mark the runtime as valid - ready to be used, cannot be modified
	// further.
	// Need to do this *before* refresh as we can remove containers there.
	// Should not be a big deal as we don't return it to users until after
	// refresh runs.
	runtime.valid = true

	// Setup the worker channel early to start accepting jobs from refresh,
	// but do not start to execute the jobs right away. The runtime is not
	// ready at this point.
	runtime.setupWorkerQueue()

	// If we need to refresh the state, do it now - things are guaranteed to
	// be set up by now.
	if doRefresh {
		// Ensure we have a store before refresh occurs
		if runtime.store == nil {
			if err := runtime.configureStore(); err != nil {
				return fmt.Errorf("configure storage: %w", err)
			}
		}

		if err2 := runtime.refresh(ctx, runtimeAliveFile); err2 != nil {
			return err2
		}
	}

	// Check current boot ID - will be written to the alive file.
	if err := runtime.checkBootID(runtimeAliveFile); err != nil {
		return err
	}

	runtime.startWorker()

	return nil
}

// TmpDir gets the current Libpod temporary files directory.
func (r *Runtime) TmpDir() (string, error) {
	if !r.valid {
		return "", define.ErrRuntimeStopped
	}

	return r.config.Engine.TmpDir, nil
}

// GetConfig returns the configuration used by the runtime.
// Note that the returned value is not a copy and must hence
// only be used in a reading fashion.
func (r *Runtime) GetConfigNoCopy() (*config.Config, error) {
	if !r.valid {
		return nil, define.ErrRuntimeStopped
	}
	return r.config, nil
}

// GetConfig returns a copy of the configuration used by the runtime.
// Please use GetConfigNoCopy() in case you only want to read from
// but not write to the returned config.
func (r *Runtime) GetConfig() (*config.Config, error) {
	rtConfig, err := r.GetConfigNoCopy()
	if err != nil {
		return nil, err
	}

	config := new(config.Config)

	// Copy so the caller won't be able to modify the actual config
	if err := JSONDeepCopy(rtConfig, config); err != nil {
		return nil, fmt.Errorf("copying config: %w", err)
	}

	return config, nil
}

// libimageEventsMap translates a libimage event type to a libpod event status.
var libimageEventsMap = map[libimage.EventType]events.Status{
	libimage.EventTypeImagePull:      events.Pull,
	libimage.EventTypeImagePullError: events.PullError,
	libimage.EventTypeImagePush:      events.Push,
	libimage.EventTypeImageRemove:    events.Remove,
	libimage.EventTypeImageLoad:      events.LoadFromArchive,
	libimage.EventTypeImageSave:      events.Save,
	libimage.EventTypeImageTag:       events.Tag,
	libimage.EventTypeImageUntag:     events.Untag,
	libimage.EventTypeImageMount:     events.Mount,
	libimage.EventTypeImageUnmount:   events.Unmount,
}

// libimageEvents spawns a goroutine which will listen for events on
// the libimage.Runtime.  The goroutine will be cleaned up implicitly
// when the main() exists.
func (r *Runtime) libimageEvents() {
	r.libimageEventsShutdown = make(chan bool)

	toLibpodEventStatus := func(e *libimage.Event) events.Status {
		status, found := libimageEventsMap[e.Type]
		if !found {
			return "Unknown"
		}
		return status
	}

	eventChannel := r.libimageRuntime.EventChannel()
	go func() {
		sawShutdown := false
		for {
			// Make sure to read and write all events before
			// shutting down.
			for len(eventChannel) > 0 {
				libimageEvent := <-eventChannel
				e := events.Event{
					ID:     libimageEvent.ID,
					Name:   libimageEvent.Name,
					Status: toLibpodEventStatus(libimageEvent),
					Time:   libimageEvent.Time,
					Type:   events.Image,
				}
				if libimageEvent.Error != nil {
					e.Error = libimageEvent.Error.Error()
				}
				if err := r.eventer.Write(e); err != nil {
					logrus.Errorf("Unable to write image event: %q", err)
				}
			}

			if sawShutdown {
				close(r.libimageEventsShutdown)
				return
			}

			select {
			case <-r.libimageEventsShutdown:
				sawShutdown = true
			case <-time.After(100 * time.Millisecond):
			}
		}
	}()
}

// DeferredShutdown shuts down the runtime without exposing any
// errors. This is only meant to be used when the runtime is being
// shutdown within a defer statement; else use Shutdown
func (r *Runtime) DeferredShutdown(force bool) {
	_ = r.Shutdown(force)
}

// Shutdown shuts down the runtime and associated containers and storage
// If force is true, containers and mounted storage will be shut down before
// cleaning up; if force is false, an error will be returned if there are
// still containers running or mounted
func (r *Runtime) Shutdown(force bool) error {
	if !r.valid {
		return nil
	}

	if r.workerChannel != nil {
		r.workerGroup.Wait()
		close(r.workerChannel)
	}

	r.valid = false

	// Shutdown all containers if --force is given
	if force {
		ctrs, err := r.state.AllContainers(false)
		if err != nil {
			logrus.Errorf("Retrieving containers from database: %v", err)
		} else {
			for _, ctr := range ctrs {
				if err := ctr.StopWithTimeout(r.config.Engine.StopTimeout); err != nil {
					logrus.Errorf("Stopping container %s: %v", ctr.ID(), err)
				}
			}
		}
	}

	var lastError error
	// If no store was requested, it can be nil and there is no need to
	// attempt to shut it down
	if r.store != nil {
		// Wait for the events to be written.
		if r.libimageEventsShutdown != nil {
			// Tell loop to shutdown
			r.libimageEventsShutdown <- true
			// Wait for close to signal shutdown
			<-r.libimageEventsShutdown
		}

		// Note that the libimage runtime shuts down the store.
		if err := r.libimageRuntime.Shutdown(force); err != nil {
			lastError = fmt.Errorf("shutting down container storage: %w", err)
		}
	}
	if err := r.state.Close(); err != nil {
		if lastError != nil {
			logrus.Error(lastError)
		}
		lastError = err
	}

	return lastError
}

// Reconfigures the runtime after a reboot
// Refreshes the state, recreating temporary files
// Does not check validity as the runtime is not valid until after this has run
func (r *Runtime) refresh(ctx context.Context, alivePath string) error {
	logrus.Debugf("Podman detected system restart - performing state refresh")

	// Clear state of database if not running in container
	if !graphRootMounted() {
		// First clear the state in the database
		if err := r.state.Refresh(); err != nil {
			return err
		}
	}

	// Next refresh the state of all containers to recreate dirs and
	// namespaces, and all the pods to recreate cgroups.
	// Containers, pods, and volumes must also reacquire their locks.
	ctrs, err := r.state.AllContainers(false)
	if err != nil {
		return fmt.Errorf("retrieving all containers from state: %w", err)
	}
	pods, err := r.state.AllPods()
	if err != nil {
		return fmt.Errorf("retrieving all pods from state: %w", err)
	}
	vols, err := r.state.AllVolumes()
	if err != nil {
		return fmt.Errorf("retrieving all volumes from state: %w", err)
	}
	// No locks are taken during pod, volume, and container refresh.
	// Furthermore, the pod/volume/container refresh() functions are not
	// allowed to take locks themselves.
	// We cannot assume that any pod/volume/container has a valid lock until
	// after this function has returned.
	// The runtime alive lock should suffice to provide mutual exclusion
	// until this has run.
	for _, ctr := range ctrs {
		if err := ctr.refresh(); err != nil {
			logrus.Errorf("Refreshing container %s: %v", ctr.ID(), err)
		}
		// This is the only place it's safe to use ctr.state.State unlocked
		// We're holding the alive lock, guaranteed to be the only Libpod on the system right now.
		if (ctr.AutoRemove() && ctr.state.State == define.ContainerStateExited) || ctr.state.State == define.ContainerStateRemoving {
			opts := ctrRmOpts{
				// Don't force-remove, we're supposed to be fresh off a reboot
				// If we have to force something is seriously wrong
				Force:        false,
				RemoveVolume: true,
			}
			// This container should have autoremoved before the
			// reboot but did not.
			// Get rid of it.
			if _, _, err := r.removeContainer(ctx, ctr, opts); err != nil {
				logrus.Errorf("Unable to remove container %s which should have autoremoved: %v", ctr.ID(), err)
			}
		}
	}
	for _, pod := range pods {
		if err := pod.refresh(); err != nil {
			logrus.Errorf("Refreshing pod %s: %v", pod.ID(), err)
		}
	}
	for _, vol := range vols {
		if err := vol.refresh(); err != nil {
			logrus.Errorf("Refreshing volume %s: %v", vol.Name(), err)
		}
	}

	// Create a file indicating the runtime is alive and ready
	file, err := os.OpenFile(alivePath, os.O_RDONLY|os.O_CREATE, 0644)
	if err != nil {
		return fmt.Errorf("creating runtime status file: %w", err)
	}
	defer file.Close()

	r.NewSystemEvent(events.Refresh)

	return nil
}

// Info returns the store and host information
func (r *Runtime) Info() (*define.Info, error) {
	return r.info()
}

// generateName generates a unique name for a container or pod.
func (r *Runtime) generateName() (string, error) {
	for {
		name := namesgenerator.GetRandomName(0)
		// Make sure container with this name does not exist
		if _, err := r.state.LookupContainer(name); err == nil {
			continue
		} else if !errors.Is(err, define.ErrNoSuchCtr) {
			return "", err
		}
		// Make sure pod with this name does not exist
		if _, err := r.state.LookupPod(name); err == nil {
			continue
		} else if !errors.Is(err, define.ErrNoSuchPod) {
			return "", err
		}
		return name, nil
	}
	// The code should never reach here.
}

// Configure store and image runtime
func (r *Runtime) configureStore() error {
	store, err := storage.GetStore(r.storageConfig)
	if err != nil {
		return err
	}

	r.store = store
	is.Transport.SetStore(store)

	// Set up a storage service for creating container root filesystems from
	// images
	r.storageService = getStorageService(r.store)

	runtimeOptions := &libimage.RuntimeOptions{
		SystemContext: r.imageContext,
	}
	libimageRuntime, err := libimage.RuntimeFromStore(store, runtimeOptions)
	if err != nil {
		return err
	}
	r.libimageRuntime = libimageRuntime
	// Run the libimage events routine.
	r.libimageEvents()

	return nil
}

// LibimageRuntime ... to allow for a step-by-step migration to libimage.
func (r *Runtime) LibimageRuntime() *libimage.Runtime {
	return r.libimageRuntime
}

// SystemContext returns the imagecontext
func (r *Runtime) SystemContext() *types.SystemContext {
	// Return the context from the libimage runtime.  libimage is sensitive
	// to a number of env vars.
	return r.libimageRuntime.SystemContext()
}

// GetOCIRuntimePath retrieves the path of the default OCI runtime.
func (r *Runtime) GetOCIRuntimePath() string {
	return r.defaultOCIRuntime.Path()
}

// DefaultOCIRuntime return copy of Default OCI Runtime
func (r *Runtime) DefaultOCIRuntime() OCIRuntime {
	return r.defaultOCIRuntime
}

// StorageConfig retrieves the storage options for the container runtime
func (r *Runtime) StorageConfig() storage.StoreOptions {
	return r.storageConfig
}

func (r *Runtime) GarbageCollect() error {
	return r.store.GarbageCollect()
}

// RunRoot retrieves the current c/storage temporary directory in use by Libpod.
func (r *Runtime) RunRoot() string {
	if r.store == nil {
		return ""
	}
	return r.store.RunRoot()
}

// GraphRoot retrieves the current c/storage directory in use by Libpod.
func (r *Runtime) GraphRoot() string {
	if r.store == nil {
		return ""
	}
	return r.store.GraphRoot()
}

// GetPodName retrieves the pod name associated with a given full ID.
// If the given ID does not correspond to any existing Pod or Container,
// ErrNoSuchPod is returned.
func (r *Runtime) GetPodName(id string) (string, error) {
	if !r.valid {
		return "", define.ErrRuntimeStopped
	}

	return r.state.GetPodName(id)
}

// DBConfig is a set of Libpod runtime configuration settings that are saved in
// a State when it is first created, and can subsequently be retrieved.
type DBConfig struct {
	LibpodRoot  string
	LibpodTmp   string
	StorageRoot string
	StorageTmp  string
	GraphDriver string
	VolumePath  string
}

// mergeDBConfig merges the configuration from the database.
func (r *Runtime) mergeDBConfig(dbConfig *DBConfig) {
	c := &r.config.Engine
	if !r.storageSet.RunRootSet && dbConfig.StorageTmp != "" {
		if r.storageConfig.RunRoot != dbConfig.StorageTmp &&
			r.storageConfig.RunRoot != "" {
			logrus.Debugf("Overriding run root %q with %q from database",
				r.storageConfig.RunRoot, dbConfig.StorageTmp)
		}
		r.storageConfig.RunRoot = dbConfig.StorageTmp
	}

	if !r.storageSet.GraphRootSet && dbConfig.StorageRoot != "" {
		if r.storageConfig.GraphRoot != dbConfig.StorageRoot &&
			r.storageConfig.GraphRoot != "" {
			logrus.Debugf("Overriding graph root %q with %q from database",
				r.storageConfig.GraphRoot, dbConfig.StorageRoot)
		}
		r.storageConfig.GraphRoot = dbConfig.StorageRoot
	}

	if !r.storageSet.GraphDriverNameSet && dbConfig.GraphDriver != "" {
		if r.storageConfig.GraphDriverName != dbConfig.GraphDriver &&
			r.storageConfig.GraphDriverName != "" {
			logrus.Errorf("User-selected graph driver %q overwritten by graph driver %q from database - delete libpod local files (%q) to resolve.  May prevent use of images created by other tools",
				r.storageConfig.GraphDriverName, dbConfig.GraphDriver, r.storageConfig.GraphRoot)
		}
		r.storageConfig.GraphDriverName = dbConfig.GraphDriver
	}

	if !r.storageSet.StaticDirSet && dbConfig.LibpodRoot != "" {
		if c.StaticDir != dbConfig.LibpodRoot && c.StaticDir != "" {
			logrus.Debugf("Overriding static dir %q with %q from database", c.StaticDir, dbConfig.LibpodRoot)
		}
		c.StaticDir = dbConfig.LibpodRoot
	}

	if !r.storageSet.TmpDirSet && dbConfig.LibpodTmp != "" {
		if c.TmpDir != dbConfig.LibpodTmp && c.TmpDir != "" {
			logrus.Debugf("Overriding tmp dir %q with %q from database", c.TmpDir, dbConfig.LibpodTmp)
		}
		c.TmpDir = dbConfig.LibpodTmp
	}

	if !r.storageSet.VolumePathSet && dbConfig.VolumePath != "" {
		if c.VolumePath != dbConfig.VolumePath && c.VolumePath != "" {
			logrus.Debugf("Overriding volume path %q with %q from database", c.VolumePath, dbConfig.VolumePath)
		}
		c.VolumePath = dbConfig.VolumePath
	}
}

func (r *Runtime) EnableLabeling() bool {
	return r.config.Containers.EnableLabeling
}

// Reload reloads the configurations files
func (r *Runtime) Reload() error {
	if err := r.reloadContainersConf(); err != nil {
		return err
	}
	if err := r.reloadStorageConf(); err != nil {
		return err
	}
	// Invalidate the registries.conf cache. The next invocation will
	// reload all data.
	sysregistriesv2.InvalidateCache()
	return nil
}

// reloadContainersConf reloads the containers.conf
func (r *Runtime) reloadContainersConf() error {
	config, err := config.Reload()
	if err != nil {
		return err
	}
	r.config = config
	logrus.Infof("Applied new containers configuration: %v", config)
	return nil
}

// reloadStorageConf reloads the storage.conf
func (r *Runtime) reloadStorageConf() error {
	configFile, err := storage.DefaultConfigFile()
	if err != nil {
		return err
	}
	storage.ReloadConfigurationFile(configFile, &r.storageConfig)
	logrus.Infof("Applied new storage configuration: %v", r.storageConfig)
	return nil
}

// getVolumePlugin gets a specific volume plugin.
func (r *Runtime) getVolumePlugin(volConfig *VolumeConfig) (*plugin.VolumePlugin, error) {
	// There is no plugin for local.
	name := volConfig.Driver
	timeout := volConfig.Timeout
	if name == define.VolumeDriverLocal || name == "" {
		return nil, nil
	}

	pluginPath, ok := r.config.Engine.VolumePlugins[name]
	if !ok {
		if name == define.VolumeDriverImage {
			return nil, nil
		}
		return nil, fmt.Errorf("no volume plugin with name %s available: %w", name, define.ErrMissingPlugin)
	}

	return plugin.GetVolumePlugin(name, pluginPath, timeout, r.config)
}

// GetSecretsStorageDir returns the directory that the secrets manager should take
func (r *Runtime) GetSecretsStorageDir() string {
	return filepath.Join(r.store.GraphRoot(), "secrets")
}

// SecretsManager returns the directory that the secrets manager should take
func (r *Runtime) SecretsManager() (*secrets.SecretsManager, error) {
	if r.secretsManager == nil {
		manager, err := secrets.NewManager(r.GetSecretsStorageDir())
		if err != nil {
			return nil, err
		}
		r.secretsManager = manager
	}
	return r.secretsManager, nil
}

func graphRootMounted() bool {
	f, err := os.OpenFile("/run/.containerenv", os.O_RDONLY, os.ModePerm)
	if err != nil {
		return false
	}
	defer f.Close()

	scanner := bufio.NewScanner(f)
	for scanner.Scan() {
		if scanner.Text() == "graphRootMounted=1" {
			return true
		}
	}
	return false
}

func (r *Runtime) graphRootMountedFlag(mounts []spec.Mount) string {
	root := r.store.GraphRoot()
	for _, val := range mounts {
		if strings.HasPrefix(root, val.Source) {
			return "graphRootMounted=1"
		}
	}
	return ""
}

// Returns a copy of the runtime alive lock
func (r *Runtime) getRuntimeAliveLock() (*lockfile.LockFile, error) {
	return lockfile.GetLockFile(filepath.Join(r.config.Engine.TmpDir, "alive.lck"))
}

// Network returns the network interface which is used by the runtime
func (r *Runtime) Network() nettypes.ContainerNetwork {
	return r.network
}

// GetDefaultNetworkName returns the network interface which is used by the runtime
func (r *Runtime) GetDefaultNetworkName() string {
	return r.config.Network.DefaultNetwork
}

// RemoteURI returns the API server URI
func (r *Runtime) RemoteURI() string {
	return r.config.Engine.RemoteURI
}

// SetRemoteURI records the API server URI
func (r *Runtime) SetRemoteURI(uri string) {
	r.config.Engine.RemoteURI = uri
}

// Get information on potential lock conflicts.
// Returns a map of lock number to object(s) using the lock, formatted as
// "container <id>" or "volume <id>" or "pod <id>", and an array of locks that
// are currently being held, formatted as []uint32.
// If the map returned is not empty, you should immediately renumber locks on
// the runtime, because you have a deadlock waiting to happen.
func (r *Runtime) LockConflicts() (map[uint32][]string, []uint32, error) {
	// Make an internal map to store what lock is associated with what
	locksInUse := make(map[uint32][]string)

	ctrs, err := r.state.AllContainers(false)
	if err != nil {
		return nil, nil, err
	}
	for _, ctr := range ctrs {
		lockNum := ctr.lock.ID()
		ctrString := fmt.Sprintf("container %s", ctr.ID())
		locksInUse[lockNum] = append(locksInUse[lockNum], ctrString)
	}

	pods, err := r.state.AllPods()
	if err != nil {
		return nil, nil, err
	}
	for _, pod := range pods {
		lockNum := pod.lock.ID()
		podString := fmt.Sprintf("pod %s", pod.ID())
		locksInUse[lockNum] = append(locksInUse[lockNum], podString)
	}

	volumes, err := r.state.AllVolumes()
	if err != nil {
		return nil, nil, err
	}
	for _, vol := range volumes {
		lockNum := vol.lock.ID()
		volString := fmt.Sprintf("volume %s", vol.Name())
		locksInUse[lockNum] = append(locksInUse[lockNum], volString)
	}

	// Now go through and find any entries with >1 item associated
	toReturn := make(map[uint32][]string)
	for lockNum, objects := range locksInUse {
		// If debug logging is requested, just spit out *every* lock in
		// use.
		logrus.Debugf("Lock number %d is in use by %v", lockNum, objects)

		if len(objects) > 1 {
			toReturn[lockNum] = objects
		}
	}

	locksHeld, err := r.lockManager.LocksHeld()
	if err != nil {
		if errors.Is(err, define.ErrNotImplemented) {
			logrus.Warnf("Could not retrieve currently taken locks as the lock backend does not support this operation")
			return toReturn, []uint32{}, nil
		}

		return nil, nil, err
	}

	return toReturn, locksHeld, nil
}

// PruneBuildContainers removes any build containers that were created during the build,
// but were not removed because the build was unexpectedly terminated.
//
// Note: This is not safe operation and should be executed only when no builds are in progress. It can interfere with builds in progress.
func (r *Runtime) PruneBuildContainers() ([]*reports.PruneReport, error) {
	stageContainersPruneReports := []*reports.PruneReport{}

	containers, err := r.store.Containers()
	if err != nil {
		return stageContainersPruneReports, err
	}
	for _, container := range containers {
		path, err := r.store.ContainerDirectory(container.ID)
		if err != nil {
			return stageContainersPruneReports, err
		}
		if err := fileutils.Exists(filepath.Join(path, "buildah.json")); err != nil {
			continue
		}

		report := &reports.PruneReport{
			Id: container.ID,
		}
		size, err := r.store.ContainerSize(container.ID)
		if err != nil {
			report.Err = err
		}
		report.Size = uint64(size)

		if err := r.store.DeleteContainer(container.ID); err != nil {
			report.Err = errors.Join(report.Err, err)
		}
		stageContainersPruneReports = append(stageContainersPruneReports, report)
	}
	return stageContainersPruneReports, nil
}

// SystemCheck checks our storage for consistency, and depending on the options
// specified, will attempt to remove anything which fails consistency checks.
func (r *Runtime) SystemCheck(ctx context.Context, options entities.SystemCheckOptions) (entities.SystemCheckReport, error) {
	what := storage.CheckEverything()
	if options.Quick {
		what = storage.CheckMost()
	}
	if options.UnreferencedLayerMaximumAge != nil {
		tmp := *options.UnreferencedLayerMaximumAge
		what.LayerUnreferencedMaximumAge = &tmp
	}
	storageReport, err := r.store.Check(what)
	if err != nil {
		return entities.SystemCheckReport{}, err
	}
	if len(storageReport.Containers) == 0 &&
		len(storageReport.Layers) == 0 &&
		len(storageReport.ROLayers) == 0 &&
		len(storageReport.Images) == 0 &&
		len(storageReport.ROImages) == 0 {
		// no errors detected
		return entities.SystemCheckReport{}, nil
	}
	mapErrorSlicesToStringSlices := func(m map[string][]error) map[string][]string {
		if len(m) == 0 {
			return nil
		}
		mapped := make(map[string][]string, len(m))
		for k, errs := range m {
			strs := make([]string, len(errs))
			for i, e := range errs {
				strs[i] = e.Error()
			}
			mapped[k] = strs
		}
		return mapped
	}

	report := entities.SystemCheckReport{
		Errors:     true,
		Layers:     mapErrorSlicesToStringSlices(storageReport.Layers),
		ROLayers:   mapErrorSlicesToStringSlices(storageReport.ROLayers),
		Images:     mapErrorSlicesToStringSlices(storageReport.Images),
		ROImages:   mapErrorSlicesToStringSlices(storageReport.ROImages),
		Containers: mapErrorSlicesToStringSlices(storageReport.Containers),
	}
	if !options.Repair && report.Errors {
		// errors detected, no corrective measures to be taken
		return report, err
	}

	// get a list of images that we knew of before we tried to clean up any
	// that were damaged
	imagesBefore, err := r.store.Images()
	if err != nil {
		return report, fmt.Errorf("getting a list of images before attempting repairs: %w", err)
	}

	repairOptions := storage.RepairOptions{
		RemoveContainers: options.RepairLossy,
	}
	var containers []*Container
	if repairOptions.RemoveContainers {
		// build a list of the containers that we claim as ours that we
		// expect to be removing in a bit
		for containerID := range storageReport.Containers {
			ctr, lookupErr := r.state.LookupContainer(containerID)
			if lookupErr != nil {
				// we're about to remove it, so it's okay that
				// it isn't even one of ours
				continue
			}
			containers = append(containers, ctr)
		}
	}

	// run the cleanup
	merr := multierror.Append(nil, r.store.Repair(storageReport, &repairOptions)...)

	if repairOptions.RemoveContainers {
		// get the list of containers that storage will still admit to knowing about
		containersAfter, err := r.store.Containers()
		if err != nil {
			merr = multierror.Append(merr, fmt.Errorf("getting a list of containers after attempting repairs: %w", err))
		}
		for _, ctr := range containers {
			// if one of our containers that we tried to remove is
			// still on disk, report an error
			if slices.IndexFunc(containersAfter, func(containerAfter storage.Container) bool {
				return containerAfter.ID == ctr.ID()
			}) != -1 {
				merr = multierror.Append(merr, fmt.Errorf("clearing storage for container %s: %w", ctr.ID(), err))
				continue
			}
			// remove the container from our database
			if removeErr := r.state.RemoveContainer(ctr); removeErr != nil {
				merr = multierror.Append(merr, fmt.Errorf("updating state database to reflect removal of container %s: %w", ctr.ID(), removeErr))
				continue
			}
			if report.RemovedContainers == nil {
				report.RemovedContainers = make(map[string]string)
			}
			report.RemovedContainers[ctr.ID()] = ctr.config.Name
		}
	}

	// get a list of images that are still around after we clean up any
	// that were damaged
	imagesAfter, err := r.store.Images()
	if err != nil {
		merr = multierror.Append(merr, fmt.Errorf("getting a list of images after attempting repairs: %w", err))
	}
	for _, imageBefore := range imagesBefore {
		if slices.IndexFunc(imagesAfter, func(imageAfter storage.Image) bool {
			return imageAfter.ID == imageBefore.ID
		}) == -1 {
			if report.RemovedImages == nil {
				report.RemovedImages = make(map[string][]string)
			}
			report.RemovedImages[imageBefore.ID] = slices.Clone(imageBefore.Names)
		}
	}

	if merr != nil {
		err = merr.ErrorOrNil()
	}

	return report, err
}

func (r *Runtime) GetContainerExitCode(id string) (int32, error) {
	return r.state.GetContainerExitCode(id)
}