mirror of
https://github.com/containers/podman.git
synced 2025-05-17 15:18:43 +08:00

We cannot unlock then lock again without syncing the state as this will then save a potentially old state causing very bad things, such as double netns cleanup issues. The fix here is simple move the saveContainerError() under the same lock. The comment about the re-lock is just wrong. Not doing this under the same lock would cause us to update the error after something else changed the container alreayd. Most likely this was caused by a misunderstanding on how go defer's work. Given they run Last In - First Out (LIFO) it is safe as long as out defer function is after the defer unlock() call. I think this issue is very bad and might have caused a variety of other weird flakes. As fact I am confident that this fixes the double cleanup errors. Fixes #21569 Also fixes the netns removal ENOENT issues seen in #19721. Signed-off-by: Paul Holzinger <pholzing@redhat.com>
1148 lines
34 KiB
Go
1148 lines
34 KiB
Go
//go:build !remote
|
|
|
|
package libpod
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/containers/common/pkg/resize"
|
|
"github.com/containers/podman/v5/libpod/define"
|
|
"github.com/containers/podman/v5/libpod/events"
|
|
"github.com/containers/storage/pkg/archive"
|
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// Init creates a container in the OCI runtime, moving a container from
|
|
// ContainerStateConfigured, ContainerStateStopped, or ContainerStateExited to
|
|
// ContainerStateCreated. Once in Created state, Conmon will be running, which
|
|
// allows the container to be attached to. The container can subsequently
|
|
// transition to ContainerStateRunning via Start(), or be transitioned back to
|
|
// ContainerStateConfigured by Cleanup() (which will stop conmon and unmount the
|
|
// container).
|
|
// Init requires that all dependency containers be started (e.g. pod infra
|
|
// containers). The `recursive` parameter will, if set to true, start these
|
|
// dependency containers before initializing this container.
|
|
func (c *Container) Init(ctx context.Context, recursive bool) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateStopped, define.ContainerStateExited) {
|
|
return fmt.Errorf("container %s has already been created in runtime: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
if !recursive {
|
|
if err := c.checkDependenciesAndHandleError(); err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
if err := c.startDependencies(ctx); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := c.prepare(); err != nil {
|
|
if err2 := c.cleanup(ctx); err2 != nil {
|
|
logrus.Errorf("Cleaning up container %s: %v", c.ID(), err2)
|
|
}
|
|
return err
|
|
}
|
|
|
|
if c.state.State == define.ContainerStateStopped {
|
|
// Reinitialize the container
|
|
return c.reinit(ctx, false)
|
|
}
|
|
|
|
// Initialize the container for the first time
|
|
return c.init(ctx, false)
|
|
}
|
|
|
|
// Start starts the given container.
|
|
// Start will accept container in ContainerStateConfigured,
|
|
// ContainerStateCreated, ContainerStateStopped, and ContainerStateExited, and
|
|
// transition them to ContainerStateRunning (all containers not in
|
|
// ContainerStateCreated will make an intermediate stop there via the Init API).
|
|
// Once in ContainerStateRunning, the container can be transitioned to
|
|
// ContainerStatePaused via Pause(), or to ContainerStateStopped by the process
|
|
// stopping (either due to exit, or being forced to stop by the Kill or Stop API
|
|
// calls).
|
|
// Start requires that all dependency containers (e.g. pod infra containers) are
|
|
// running before starting the container. The recursive parameter, if set, will start all
|
|
// dependencies before starting this container.
|
|
func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error) {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
// defer's are executed LIFO so we are locked here
|
|
// as long as we call this after the defer unlock()
|
|
defer func() {
|
|
if finalErr != nil {
|
|
if err := saveContainerError(c, finalErr); err != nil {
|
|
logrus.Debug(err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if err := c.prepareToStart(ctx, recursive); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Start the container
|
|
if err := c.start(); err != nil {
|
|
return err
|
|
}
|
|
return c.waitForHealthy(ctx)
|
|
}
|
|
|
|
// Update updates the given container.
|
|
// Either resource limits or restart policy can be updated.
|
|
// Either resources or restartPolicy must not be nil.
|
|
// If restartRetries is not nil, restartPolicy must be set and must be "on-failure".
|
|
func (c *Container) Update(resources *spec.LinuxResources, restartPolicy *string, restartRetries *uint) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if c.ensureState(define.ContainerStateRemoving) {
|
|
return fmt.Errorf("container %s is being removed, cannot update: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
return c.update(resources, restartPolicy, restartRetries)
|
|
}
|
|
|
|
// Attach to a container.
|
|
// The last parameter "start" can be used to also start the container.
|
|
// This will then Start and Attach APIs, ensuring proper
|
|
// ordering of the two such that no output from the container is lost (e.g. the
|
|
// Attach call occurs before Start).
|
|
func (c *Container) Attach(ctx context.Context, streams *define.AttachStreams, keys string, resize <-chan resize.TerminalSize, start bool) (retChan <-chan error, finalErr error) {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
// defer's are executed LIFO so we are locked here
|
|
// as long as we call this after the defer unlock()
|
|
defer func() {
|
|
if finalErr != nil {
|
|
if err := saveContainerError(c, finalErr); err != nil {
|
|
logrus.Debug(err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if c.state.State != define.ContainerStateRunning {
|
|
if !start {
|
|
return nil, errors.New("you can only attach to running containers")
|
|
}
|
|
|
|
if err := c.prepareToStart(ctx, true); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
if !start {
|
|
// A bit awkward, technically passthrough never supports attach. We only pretend
|
|
// it does as we leak the stdio fds down into the container but that of course only
|
|
// works if we are the process that started conmon with the right fds.
|
|
if c.LogDriver() == define.PassthroughLogging {
|
|
return nil, fmt.Errorf("this container is using the 'passthrough' log driver, cannot attach: %w", define.ErrNoLogs)
|
|
} else if c.LogDriver() == define.PassthroughTTYLogging {
|
|
return nil, fmt.Errorf("this container is using the 'passthrough-tty' log driver, cannot attach: %w", define.ErrNoLogs)
|
|
}
|
|
}
|
|
|
|
attachChan := make(chan error)
|
|
|
|
// We need to ensure that we don't return until start() fired in attach.
|
|
// Use a channel to sync
|
|
startedChan := make(chan bool)
|
|
|
|
// Attach to the container before starting it
|
|
go func() {
|
|
// Start resizing
|
|
if c.LogDriver() != define.PassthroughLogging && c.LogDriver() != define.PassthroughTTYLogging {
|
|
registerResizeFunc(resize, c.bundlePath())
|
|
}
|
|
|
|
opts := new(AttachOptions)
|
|
opts.Streams = streams
|
|
opts.DetachKeys = &keys
|
|
opts.Start = start
|
|
opts.Started = startedChan
|
|
|
|
// attach and start the container on a different thread. waitForHealthy must
|
|
// be done later, as it requires to run on the same thread that holds the lock
|
|
// for the container.
|
|
if err := c.ociRuntime.Attach(c, opts); err != nil {
|
|
attachChan <- err
|
|
}
|
|
close(attachChan)
|
|
}()
|
|
|
|
select {
|
|
case err := <-attachChan:
|
|
return nil, err
|
|
case <-startedChan:
|
|
c.newContainerEvent(events.Attach)
|
|
}
|
|
|
|
if start {
|
|
if err := c.waitForHealthy(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return attachChan, nil
|
|
}
|
|
|
|
// RestartWithTimeout restarts a running container and takes a given timeout in uint
|
|
func (c *Container) RestartWithTimeout(ctx context.Context, timeout uint) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := c.checkDependenciesAndHandleError(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return c.restartWithTimeout(ctx, timeout)
|
|
}
|
|
|
|
// Stop uses the container's stop signal (or SIGTERM if no signal was specified)
|
|
// to stop the container, and if it has not stopped after container's stop
|
|
// timeout, SIGKILL is used to attempt to forcibly stop the container
|
|
// Default stop timeout is 10 seconds, but can be overridden when the container
|
|
// is created
|
|
func (c *Container) Stop() error {
|
|
// Stop with the container's given timeout
|
|
return c.StopWithTimeout(c.config.StopTimeout)
|
|
}
|
|
|
|
// StopWithTimeout is a version of Stop that allows a timeout to be specified
|
|
// manually. If timeout is 0, SIGKILL will be used immediately to kill the
|
|
// container.
|
|
func (c *Container) StopWithTimeout(timeout uint) (finalErr error) {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
// defer's are executed LIFO so we are locked here
|
|
// as long as we call this after the defer unlock()
|
|
defer func() {
|
|
if finalErr != nil {
|
|
if err := saveContainerError(c, finalErr); err != nil {
|
|
logrus.Debug(err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return c.stop(timeout)
|
|
}
|
|
|
|
// Kill sends a signal to a container
|
|
func (c *Container) Kill(signal uint) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
switch c.state.State {
|
|
case define.ContainerStateRunning, define.ContainerStateStopping, define.ContainerStatePaused:
|
|
// Note that killing containers in "stopping" state is okay.
|
|
// In that state, the Podman is waiting for the runtime to
|
|
// stop the container and if that is taking too long, a user
|
|
// may have decided to kill the container after all.
|
|
default:
|
|
return fmt.Errorf("can only kill running containers. %s is in state %s: %w", c.ID(), c.state.State.String(), define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
// Hardcode all = false, we only use all when removing.
|
|
if err := c.ociRuntime.KillContainer(c, signal, false); err != nil {
|
|
return err
|
|
}
|
|
|
|
c.state.StoppedByUser = true
|
|
|
|
c.newContainerEvent(events.Kill)
|
|
|
|
// Make sure to wait for the container to exit in case of SIGKILL.
|
|
if signal == uint(unix.SIGKILL) {
|
|
return c.waitForConmonToExitAndSave()
|
|
}
|
|
|
|
return c.save()
|
|
}
|
|
|
|
// HTTPAttach forwards an attach session over a hijacked HTTP session.
|
|
// HTTPAttach will consume and close the included httpCon, which is expected to
|
|
// be sourced from a hijacked HTTP connection.
|
|
// The cancel channel is optional, and can be used to asynchronously cancel the
|
|
// attach session.
|
|
// The streams variable is only supported if the container was not a terminal,
|
|
// and allows specifying which of the container's standard streams will be
|
|
// forwarded to the client.
|
|
// This function returns when the attach finishes. It does not hold the lock for
|
|
// the duration of its runtime, only using it at the beginning to verify state.
|
|
// The streamLogs parameter indicates that all the container's logs until present
|
|
// will be streamed at the beginning of the attach.
|
|
// The streamAttach parameter indicates that the attach itself will be streamed
|
|
// over the socket; if this is not set, but streamLogs is, only the logs will be
|
|
// sent.
|
|
// At least one of streamAttach and streamLogs must be set.
|
|
func (c *Container) HTTPAttach(r *http.Request, w http.ResponseWriter, streams *HTTPAttachStreams, detachKeys *string, cancel <-chan bool, streamAttach, streamLogs bool, hijackDone chan<- bool) error {
|
|
// Ensure we don't leak a goroutine if we exit before hijack completes.
|
|
defer func() {
|
|
close(hijackDone)
|
|
}()
|
|
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
if err := c.syncContainer(); err != nil {
|
|
c.lock.Unlock()
|
|
|
|
return err
|
|
}
|
|
// We are NOT holding the lock for the duration of the function.
|
|
c.lock.Unlock()
|
|
}
|
|
|
|
if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) {
|
|
return fmt.Errorf("can only attach to created or running containers: %w", define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
if !streamAttach && !streamLogs {
|
|
return fmt.Errorf("must specify at least one of stream or logs: %w", define.ErrInvalidArg)
|
|
}
|
|
|
|
logrus.Infof("Performing HTTP Hijack attach to container %s", c.ID())
|
|
|
|
c.newContainerEvent(events.Attach)
|
|
return c.ociRuntime.HTTPAttach(c, r, w, streams, detachKeys, cancel, hijackDone, streamAttach, streamLogs)
|
|
}
|
|
|
|
// AttachResize resizes the container's terminal, which is displayed by Attach
|
|
// and HTTPAttach.
|
|
func (c *Container) AttachResize(newSize resize.TerminalSize) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) {
|
|
return fmt.Errorf("can only resize created or running containers: %w", define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
logrus.Infof("Resizing TTY of container %s", c.ID())
|
|
|
|
return c.ociRuntime.AttachResize(c, newSize)
|
|
}
|
|
|
|
// Mount mounts a container's filesystem on the host
|
|
// The path where the container has been mounted is returned
|
|
func (c *Container) Mount() (string, error) {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return "", err
|
|
}
|
|
}
|
|
|
|
defer c.newContainerEvent(events.Mount)
|
|
return c.mount()
|
|
}
|
|
|
|
// Unmount unmounts a container's filesystem on the host
|
|
func (c *Container) Unmount(force bool) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if c.state.Mounted {
|
|
mounted, err := c.runtime.storageService.MountedContainerImage(c.ID())
|
|
if err != nil {
|
|
return fmt.Errorf("can't determine how many times %s is mounted, refusing to unmount: %w", c.ID(), err)
|
|
}
|
|
if mounted == 1 {
|
|
if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
|
|
return fmt.Errorf("cannot unmount storage for container %s as it is running or paused: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
execSessions, err := c.getActiveExecSessions()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(execSessions) != 0 {
|
|
return fmt.Errorf("container %s has active exec sessions, refusing to unmount: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
return fmt.Errorf("can't unmount %s last mount, it is still in use: %w", c.ID(), define.ErrInternal)
|
|
}
|
|
}
|
|
defer c.newContainerEvent(events.Unmount)
|
|
return c.unmount(force)
|
|
}
|
|
|
|
// Pause pauses a container
|
|
func (c *Container) Pause() error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if c.state.State == define.ContainerStatePaused {
|
|
return fmt.Errorf("%q is already paused: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
if c.state.State != define.ContainerStateRunning {
|
|
return fmt.Errorf("%q is not running, can't pause: %w", c.state.State, define.ErrCtrStateInvalid)
|
|
}
|
|
defer c.newContainerEvent(events.Pause)
|
|
return c.pause()
|
|
}
|
|
|
|
// Unpause unpauses a container
|
|
func (c *Container) Unpause() error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if c.state.State != define.ContainerStatePaused {
|
|
return fmt.Errorf("%q is not paused, can't unpause: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
defer c.newContainerEvent(events.Unpause)
|
|
return c.unpause()
|
|
}
|
|
|
|
// Export exports a container's root filesystem as a tar archive
|
|
// The archive will be saved as a file at the given path
|
|
func (c *Container) Export(out io.Writer) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if c.state.State == define.ContainerStateRemoving {
|
|
return fmt.Errorf("cannot mount container %s as it is being removed: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
defer c.newContainerEvent(events.Mount)
|
|
return c.export(out)
|
|
}
|
|
|
|
// AddArtifact creates and writes to an artifact file for the container
|
|
func (c *Container) AddArtifact(name string, data []byte) error {
|
|
if !c.valid {
|
|
return define.ErrCtrRemoved
|
|
}
|
|
|
|
return os.WriteFile(c.getArtifactPath(name), data, 0o740)
|
|
}
|
|
|
|
// GetArtifact reads the specified artifact file from the container
|
|
func (c *Container) GetArtifact(name string) ([]byte, error) {
|
|
if !c.valid {
|
|
return nil, define.ErrCtrRemoved
|
|
}
|
|
|
|
return os.ReadFile(c.getArtifactPath(name))
|
|
}
|
|
|
|
// RemoveArtifact deletes the specified artifacts file
|
|
func (c *Container) RemoveArtifact(name string) error {
|
|
if !c.valid {
|
|
return define.ErrCtrRemoved
|
|
}
|
|
|
|
return os.Remove(c.getArtifactPath(name))
|
|
}
|
|
|
|
// Wait blocks until the container exits and returns its exit code.
|
|
func (c *Container) Wait(ctx context.Context) (int32, error) {
|
|
return c.WaitForExit(ctx, DefaultWaitInterval)
|
|
}
|
|
|
|
// WaitForExit blocks until the container exits and returns its exit code. The
|
|
// argument is the interval at which checks the container's status.
|
|
func (c *Container) WaitForExit(ctx context.Context, pollInterval time.Duration) (int32, error) {
|
|
id := c.ID()
|
|
if !c.valid {
|
|
// if the container is not valid at this point as it was deleted,
|
|
// check if the exit code was recorded in the db.
|
|
exitCode, err := c.runtime.state.GetContainerExitCode(id)
|
|
if err == nil {
|
|
return exitCode, nil
|
|
}
|
|
return -1, define.ErrCtrRemoved
|
|
}
|
|
var conmonTimer time.Timer
|
|
conmonTimerSet := false
|
|
|
|
conmonPidFd := c.getConmonPidFd()
|
|
if conmonPidFd != -1 {
|
|
defer unix.Close(conmonPidFd)
|
|
}
|
|
conmonPidFdTriggered := false
|
|
|
|
getExitCode := func() (bool, int32, error) {
|
|
containerRemoved := false
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
}
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
if !errors.Is(err, define.ErrNoSuchCtr) {
|
|
return false, -1, err
|
|
}
|
|
containerRemoved = true
|
|
}
|
|
|
|
// If conmon is not alive anymore set a timer to make sure
|
|
// we're returning even if conmon has forcefully been killed.
|
|
if !conmonTimerSet && !containerRemoved {
|
|
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
|
|
switch {
|
|
case errors.Is(err, define.ErrNoSuchCtr):
|
|
// Container has been removed, so we assume the
|
|
// exit code is present in the DB.
|
|
containerRemoved = true
|
|
case err != nil:
|
|
return false, -1, err
|
|
case !conmonAlive:
|
|
// Give the exit code at most 20 seconds to
|
|
// show up in the DB. That should largely be
|
|
// enough for the cleanup process.
|
|
timerDuration := time.Second * 20
|
|
conmonTimer = *time.NewTimer(timerDuration)
|
|
conmonTimerSet = true
|
|
case conmonAlive:
|
|
// Continue waiting if conmon's still running.
|
|
return false, -1, nil
|
|
}
|
|
}
|
|
|
|
timedout := ""
|
|
if !containerRemoved {
|
|
// If conmon is dead for more than $timerDuration or if the
|
|
// container has exited properly, try to look up the exit code.
|
|
select {
|
|
case <-conmonTimer.C:
|
|
logrus.Debugf("Exceeded conmon timeout waiting for container %s to exit", id)
|
|
timedout = " [exceeded conmon timeout waiting for container to exit]"
|
|
default:
|
|
switch c.state.State {
|
|
case define.ContainerStateExited, define.ContainerStateConfigured:
|
|
// Container exited, so we can look up the exit code.
|
|
case define.ContainerStateStopped:
|
|
// Continue looping unless the restart policy is always.
|
|
// In this case, the container would never transition to
|
|
// the exited state, so we need to look up the exit code.
|
|
if c.config.RestartPolicy != define.RestartPolicyAlways {
|
|
return false, -1, nil
|
|
}
|
|
default:
|
|
// Continue looping
|
|
return false, -1, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
exitCode, err := c.runtime.state.GetContainerExitCode(id)
|
|
if err != nil {
|
|
if errors.Is(err, define.ErrNoSuchExitCode) {
|
|
// If the container is configured or created, we must assume it never ran.
|
|
if c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated) {
|
|
return true, 0, nil
|
|
}
|
|
}
|
|
return true, -1, fmt.Errorf("%w (container in state %s)%s", err, c.state.State, timedout)
|
|
}
|
|
|
|
return true, exitCode, nil
|
|
}
|
|
|
|
for {
|
|
hasExited, exitCode, err := getExitCode()
|
|
if hasExited {
|
|
return exitCode, err
|
|
}
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return -1, fmt.Errorf("waiting for exit code of container %s canceled", id)
|
|
default:
|
|
if conmonPidFd != -1 && !conmonPidFdTriggered {
|
|
// If possible (pidfd works), the first cycle we block until conmon dies
|
|
// If this happens, and we fall back to the old poll delay
|
|
// There is a deadlock in the cleanup code for "play kube" which causes
|
|
// conmon to not exit, so unfortunately we have to use the poll interval
|
|
// timeout here to avoid hanging.
|
|
fds := []unix.PollFd{{Fd: int32(conmonPidFd), Events: unix.POLLIN}}
|
|
_, _ = unix.Poll(fds, int(pollInterval.Milliseconds()))
|
|
conmonPidFdTriggered = true
|
|
} else {
|
|
time.Sleep(pollInterval)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type waitResult struct {
|
|
code int32
|
|
err error
|
|
}
|
|
|
|
func (c *Container) WaitForConditionWithInterval(ctx context.Context, waitTimeout time.Duration, conditions ...string) (int32, error) {
|
|
if !c.valid {
|
|
return -1, define.ErrCtrRemoved
|
|
}
|
|
|
|
if len(conditions) == 0 {
|
|
panic("at least one condition should be passed")
|
|
}
|
|
|
|
ctx, cancelFn := context.WithCancel(ctx)
|
|
defer cancelFn()
|
|
|
|
resultChan := make(chan waitResult)
|
|
waitForExit := false
|
|
wantedStates := make(map[define.ContainerStatus]bool, len(conditions))
|
|
wantedHealthStates := make(map[string]bool)
|
|
|
|
for _, rawCondition := range conditions {
|
|
switch rawCondition {
|
|
case define.HealthCheckHealthy, define.HealthCheckUnhealthy:
|
|
if !c.HasHealthCheck() {
|
|
return -1, fmt.Errorf("cannot use condition %q: container %s has no healthcheck", rawCondition, c.ID())
|
|
}
|
|
wantedHealthStates[rawCondition] = true
|
|
default:
|
|
condition, err := define.StringToContainerStatus(rawCondition)
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
switch condition {
|
|
case define.ContainerStateExited, define.ContainerStateStopped:
|
|
waitForExit = true
|
|
default:
|
|
wantedStates[condition] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
trySend := func(code int32, err error) {
|
|
select {
|
|
case resultChan <- waitResult{code, err}:
|
|
case <-ctx.Done():
|
|
}
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
if waitForExit {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
|
|
code, err := c.WaitForExit(ctx, waitTimeout)
|
|
trySend(code, err)
|
|
}()
|
|
}
|
|
|
|
if len(wantedStates) > 0 || len(wantedHealthStates) > 0 {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
stoppedCount := 0
|
|
for {
|
|
if len(wantedStates) > 0 {
|
|
state, err := c.State()
|
|
if err != nil {
|
|
trySend(-1, err)
|
|
return
|
|
}
|
|
if _, found := wantedStates[state]; found {
|
|
trySend(-1, nil)
|
|
return
|
|
}
|
|
}
|
|
if len(wantedHealthStates) > 0 {
|
|
// even if we are interested only in the health check
|
|
// check that the container is still running to avoid
|
|
// waiting until the timeout expires.
|
|
if stoppedCount > 0 {
|
|
stoppedCount++
|
|
} else {
|
|
state, err := c.State()
|
|
if err != nil {
|
|
trySend(-1, err)
|
|
return
|
|
}
|
|
if state != define.ContainerStateCreated && state != define.ContainerStateRunning && state != define.ContainerStatePaused {
|
|
stoppedCount++
|
|
}
|
|
}
|
|
status, err := c.HealthCheckStatus()
|
|
if err != nil {
|
|
trySend(-1, err)
|
|
return
|
|
}
|
|
if _, found := wantedHealthStates[status]; found {
|
|
trySend(-1, nil)
|
|
return
|
|
}
|
|
// wait for another waitTimeout interval to give the health check process some time
|
|
// to record the healthy status.
|
|
if stoppedCount > 1 {
|
|
trySend(-1, define.ErrCtrStopped)
|
|
return
|
|
}
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-time.After(waitTimeout):
|
|
continue
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
var result waitResult
|
|
select {
|
|
case result = <-resultChan:
|
|
cancelFn()
|
|
case <-ctx.Done():
|
|
result = waitResult{-1, define.ErrCanceled}
|
|
}
|
|
wg.Wait()
|
|
return result.code, result.err
|
|
}
|
|
|
|
// Cleanup unmounts all mount points in container and cleans up container storage
|
|
// It also cleans up the network stack
|
|
func (c *Container) Cleanup(ctx context.Context) error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
// When the container has already been removed, the OCI runtime directory remains.
|
|
if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
|
|
if err := c.cleanupRuntime(ctx); err != nil {
|
|
return fmt.Errorf("cleaning up container %s from OCI runtime: %w", c.ID(), err)
|
|
}
|
|
return nil
|
|
}
|
|
logrus.Errorf("Syncing container %s status: %v", c.ID(), err)
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Check if state is good
|
|
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStateExited) {
|
|
return fmt.Errorf("container %s is running or paused, refusing to clean up: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
// if the container was not created in the oci runtime or was already cleaned up, then do nothing
|
|
if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
|
|
return nil
|
|
}
|
|
|
|
// Handle restart policy.
|
|
// Returns a bool indicating whether we actually restarted.
|
|
// If we did, don't proceed to cleanup - just exit.
|
|
didRestart, err := c.handleRestartPolicy(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if didRestart {
|
|
return nil
|
|
}
|
|
|
|
// If we didn't restart, we perform a normal cleanup
|
|
|
|
// make sure all the container processes are terminated if we are running without a pid namespace.
|
|
hasPidNs := false
|
|
if c.config.Spec.Linux != nil {
|
|
for _, i := range c.config.Spec.Linux.Namespaces {
|
|
if i.Type == spec.PIDNamespace {
|
|
hasPidNs = true
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if !hasPidNs {
|
|
// do not fail on errors
|
|
_ = c.ociRuntime.KillContainer(c, uint(unix.SIGKILL), true)
|
|
}
|
|
|
|
// Check for running exec sessions
|
|
sessions, err := c.getActiveExecSessions()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len(sessions) > 0 {
|
|
return fmt.Errorf("container %s has active exec sessions, refusing to clean up: %w", c.ID(), define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
defer c.newContainerEvent(events.Cleanup)
|
|
return c.cleanup(ctx)
|
|
}
|
|
|
|
// Batch starts a batch operation on the given container
|
|
// All commands in the passed function will execute under the same lock and
|
|
// without synchronizing state after each operation
|
|
// This will result in substantial performance benefits when running numerous
|
|
// commands on the same container
|
|
// Note that the container passed into the Batch function cannot be removed
|
|
// during batched operations. runtime.RemoveContainer can only be called outside
|
|
// of Batch
|
|
// Any error returned by the given batch function will be returned unmodified by
|
|
// Batch
|
|
// As Batch normally disables updating the current state of the container, the
|
|
// Sync() function is provided to enable container state to be updated and
|
|
// checked within Batch.
|
|
func (c *Container) Batch(batchFunc func(*Container) error) error {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
newCtr := new(Container)
|
|
newCtr.config = c.config
|
|
newCtr.state = c.state
|
|
newCtr.runtime = c.runtime
|
|
newCtr.ociRuntime = c.ociRuntime
|
|
newCtr.lock = c.lock
|
|
newCtr.valid = true
|
|
|
|
newCtr.batched = true
|
|
err := batchFunc(newCtr)
|
|
newCtr.batched = false
|
|
|
|
return err
|
|
}
|
|
|
|
// Sync updates the status of a container by querying the OCI runtime.
|
|
// If the container has not been created inside the OCI runtime, nothing will be
|
|
// done.
|
|
// Most of the time, Podman does not explicitly query the OCI runtime for
|
|
// container status, and instead relies upon exit files created by conmon.
|
|
// This can cause a disconnect between running state and what Podman sees in
|
|
// cases where Conmon was killed unexpectedly, or runc was upgraded.
|
|
// Running a manual Sync() ensures that container state will be correct in
|
|
// such situations.
|
|
func (c *Container) Sync() error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
}
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
|
|
defer c.newContainerEvent(events.Sync)
|
|
return nil
|
|
}
|
|
|
|
// ReloadNetwork reconfigures the container's network.
|
|
// Technically speaking, it will tear down and then reconfigure the container's
|
|
// network namespace, which will result in all firewall rules being recreated.
|
|
// It is mostly intended to be used in cases where the system firewall has been
|
|
// reloaded, and existing rules have been wiped out. It is expected that some
|
|
// downtime will result, as the rules are destroyed as part of this process.
|
|
// At present, this only works on root containers; it may be expanded to restart
|
|
// slirp4netns in the future to work with rootless containers as well.
|
|
// Requires that the container must be running or created.
|
|
func (c *Container) ReloadNetwork() error {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning) {
|
|
return fmt.Errorf("cannot reload network unless container network has been configured: %w", define.ErrCtrStateInvalid)
|
|
}
|
|
|
|
return c.reloadNetwork()
|
|
}
|
|
|
|
// Refresh is DEPRECATED and REMOVED.
|
|
func (c *Container) Refresh(ctx context.Context) error {
|
|
// This has been deprecated for a long while, and is in the process of
|
|
// being removed.
|
|
return define.ErrNotImplemented
|
|
}
|
|
|
|
// ContainerCheckpointOptions is a struct used to pass the parameters
|
|
// for checkpointing (and restoring) to the corresponding functions
|
|
type ContainerCheckpointOptions struct {
|
|
// Keep tells the API to not delete checkpoint artifacts
|
|
Keep bool
|
|
// KeepRunning tells the API to keep the container running
|
|
// after writing the checkpoint to disk
|
|
KeepRunning bool
|
|
// TCPEstablished tells the API to checkpoint a container
|
|
// even if it contains established TCP connections
|
|
TCPEstablished bool
|
|
// TargetFile tells the API to read (or write) the checkpoint image
|
|
// from (or to) the filename set in TargetFile
|
|
TargetFile string
|
|
// CheckpointImageID tells the API to restore the container from
|
|
// checkpoint image with ID set in CheckpointImageID
|
|
CheckpointImageID string
|
|
// Name tells the API that during restore from an exported
|
|
// checkpoint archive a new name should be used for the
|
|
// restored container
|
|
Name string
|
|
// IgnoreRootfs tells the API to not export changes to
|
|
// the container's root file-system (or to not import)
|
|
IgnoreRootfs bool
|
|
// IgnoreStaticIP tells the API to ignore the IP set
|
|
// during 'podman run' with '--ip'. This is especially
|
|
// important to be able to restore a container multiple
|
|
// times with '--import --name'.
|
|
IgnoreStaticIP bool
|
|
// IgnoreStaticMAC tells the API to ignore the MAC set
|
|
// during 'podman run' with '--mac-address'. This is especially
|
|
// important to be able to restore a container multiple
|
|
// times with '--import --name'.
|
|
IgnoreStaticMAC bool
|
|
// IgnoreVolumes tells the API to not export or not to import
|
|
// the content of volumes associated with the container
|
|
IgnoreVolumes bool
|
|
// Pre Checkpoint container and leave container running
|
|
PreCheckPoint bool
|
|
// Dump container with Pre Checkpoint images
|
|
WithPrevious bool
|
|
// ImportPrevious tells the API to restore container with two
|
|
// images. One is TargetFile, the other is ImportPrevious.
|
|
ImportPrevious string
|
|
// CreateImage tells Podman to create an OCI image from container
|
|
// checkpoint in the local image store.
|
|
CreateImage string
|
|
// Compression tells the API which compression to use for
|
|
// the exported checkpoint archive.
|
|
Compression archive.Compression
|
|
// If Pod is set the container should be restored into the
|
|
// given Pod. If Pod is empty it is a restore without a Pod.
|
|
// Restoring a non Pod container into a Pod or a Pod container
|
|
// without a Pod is theoretically possible, but will
|
|
// probably not work if a PID namespace is shared.
|
|
// A shared PID namespace means that a Pod container has PID 1
|
|
// in the infrastructure container, but without the infrastructure
|
|
// container no PID 1 will be in the namespace and that is not
|
|
// possible.
|
|
Pod string
|
|
// PrintStats tells the API to fill out the statistics about
|
|
// how much time each component in the stack requires to
|
|
// checkpoint a container.
|
|
PrintStats bool
|
|
// FileLocks tells the API to checkpoint/restore a container
|
|
// with file-locks
|
|
FileLocks bool
|
|
}
|
|
|
|
// Checkpoint checkpoints a container
|
|
// The return values *define.CRIUCheckpointRestoreStatistics and int64 (time
|
|
// the runtime needs to checkpoint the container) are only set if
|
|
// options.PrintStats is set to true. Not setting options.PrintStats to true
|
|
// will return nil and 0.
|
|
func (c *Container) Checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
|
|
logrus.Debugf("Trying to checkpoint container %s", c.ID())
|
|
|
|
if options.TargetFile != "" {
|
|
if err := c.prepareCheckpointExport(); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
}
|
|
|
|
if options.WithPrevious {
|
|
if err := c.canWithPrevious(); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
}
|
|
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
}
|
|
return c.checkpoint(ctx, options)
|
|
}
|
|
|
|
// Restore restores a container
|
|
// The return values *define.CRIUCheckpointRestoreStatistics and int64 (time
|
|
// the runtime needs to restore the container) are only set if
|
|
// options.PrintStats is set to true. Not setting options.PrintStats to true
|
|
// will return nil and 0.
|
|
func (c *Container) Restore(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
|
|
if options.Pod == "" {
|
|
logrus.Debugf("Trying to restore container %s", c.ID())
|
|
} else {
|
|
logrus.Debugf("Trying to restore container %s into pod %s", c.ID(), options.Pod)
|
|
}
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return nil, 0, err
|
|
}
|
|
}
|
|
defer c.newContainerEvent(events.Restore)
|
|
return c.restore(ctx, options)
|
|
}
|
|
|
|
// Indicate whether or not the container should restart
|
|
func (c *Container) ShouldRestart(ctx context.Context) bool {
|
|
logrus.Debugf("Checking if container %s should restart", c.ID())
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return false
|
|
}
|
|
}
|
|
return c.shouldRestart()
|
|
}
|
|
|
|
// CopyFromArchive copies the contents from the specified tarStream to path
|
|
// *inside* the container.
|
|
func (c *Container) CopyFromArchive(_ context.Context, containerPath string, chown, noOverwriteDirNonDir bool, rename map[string]string, tarStream io.Reader) (func() error, error) {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return c.copyFromArchive(containerPath, chown, noOverwriteDirNonDir, rename, tarStream)
|
|
}
|
|
|
|
// CopyToArchive copies the contents from the specified path *inside* the
|
|
// container to the tarStream.
|
|
func (c *Container) CopyToArchive(ctx context.Context, containerPath string, tarStream io.Writer) (func() error, error) {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return c.copyToArchive(containerPath, tarStream)
|
|
}
|
|
|
|
// Stat the specified path *inside* the container and return a file info.
|
|
func (c *Container) Stat(ctx context.Context, containerPath string) (*define.FileInfo, error) {
|
|
if !c.batched {
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
if err := c.syncContainer(); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
var mountPoint string
|
|
var err error
|
|
if c.state.Mounted {
|
|
mountPoint = c.state.Mountpoint
|
|
} else {
|
|
mountPoint, err = c.mount()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if err := c.unmount(false); err != nil {
|
|
logrus.Errorf("Unmounting container %s: %v", c.ID(), err)
|
|
}
|
|
}()
|
|
}
|
|
|
|
info, _, _, err := c.stat(mountPoint, containerPath)
|
|
return info, err
|
|
}
|
|
|
|
func saveContainerError(c *Container, err error) error {
|
|
c.state.Error = err.Error()
|
|
return c.save()
|
|
}
|