Files
podman/libpod/pod_api.go
Urvashi Mohnani edbeee5238 Add --restart flag to pod create
Add --restart flag to pod create to allow users to set the
restart policy for the pod, which applies to all the containers
in the pod. This reuses the restart policy already there for
containers and has the same restart policy options.
Add "never" to the restart policy options to match k8s syntax.
It is a synonym for "no" and does the exact same thing where the
containers are not restarted once exited.
Only the containers that have exited will be restarted based on the
restart policy, running containers will not be restarted when an exited
container is restarted in the same pod (same as is done in k8s).

Signed-off-by: Urvashi Mohnani <umohnani@redhat.com>
2023-05-02 10:29:58 -04:00

749 lines
23 KiB
Go

package libpod
import (
"context"
"errors"
"fmt"
"github.com/containers/common/pkg/cgroups"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/pkg/parallel"
"github.com/containers/podman/v4/pkg/rootless"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
// startInitContainers starts a pod's init containers.
func (p *Pod) startInitContainers(ctx context.Context) error {
initCtrs, err := p.initContainers()
if err != nil {
return err
}
// Now iterate init containers
for _, initCon := range initCtrs {
if err := initCon.Start(ctx, true); err != nil {
return err
}
// Check that the init container waited correctly and the exit
// code is good
rc, err := initCon.Wait(ctx)
if err != nil {
return err
}
if rc != 0 {
return fmt.Errorf("init container %s exited with code %d", initCon.ID(), rc)
}
// If the container is a once init container, we need to remove it
// after it runs
if initCon.config.InitContainerType == define.OneShotInitContainer {
icLock := initCon.lock
icLock.Lock()
var time *uint
if err := p.runtime.removeContainer(ctx, initCon, false, false, true, false, time); err != nil {
icLock.Unlock()
return fmt.Errorf("failed to remove once init container %s: %w", initCon.ID(), err)
}
icLock.Unlock()
}
}
return nil
}
// Start starts all containers within a pod.
// It combines the effects of Init() and Start() on a container.
// If a container has already been initialized it will be started,
// otherwise it will be initialized then started.
// Containers that are already running or have been paused are ignored
// All containers are started independently, in order dictated by their
// dependencies.
// An error and a map[string]error are returned.
// If the error is not nil and the map is nil, an error was encountered before
// any containers were started.
// If map is not nil, an error was encountered when starting one or more
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrPodPartialFail.
// If both error and the map are nil, all containers were started successfully.
func (p *Pod) Start(ctx context.Context) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return nil, define.ErrPodRemoved
}
if err := p.maybeStartServiceContainer(ctx); err != nil {
return nil, err
}
// Before "regular" containers start in the pod, all init containers
// must have run and exited successfully.
if err := p.startInitContainers(ctx); err != nil {
return nil, err
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
// Build a dependency graph of containers in the pod
graph, err := BuildContainerGraph(allCtrs)
if err != nil {
return nil, fmt.Errorf("generating dependency graph for pod %s: %w", p.ID(), err)
}
// If there are no containers without dependencies, we can't start
// Error out
if len(graph.noDepNodes) == 0 {
return nil, fmt.Errorf("no containers in pod %s have no dependencies, cannot start pod: %w", p.ID(), define.ErrNoSuchCtr)
}
ctrErrors := make(map[string]error)
ctrsVisited := make(map[string]bool)
// Traverse the graph beginning at nodes with no dependencies
for _, node := range graph.noDepNodes {
startNode(ctx, node, false, ctrErrors, ctrsVisited, false)
}
if len(ctrErrors) > 0 {
return ctrErrors, fmt.Errorf("starting some containers: %w", define.ErrPodPartialFail)
}
defer p.newPodEvent(events.Start)
return nil, nil
}
// Stop stops all containers within a pod without a timeout. It assumes -1 for
// a timeout.
func (p *Pod) Stop(ctx context.Context, cleanup bool) (map[string]error, error) {
return p.StopWithTimeout(ctx, cleanup, -1)
}
// StopWithTimeout stops all containers within a pod that are not already stopped
// Each container will use its own stop timeout.
// Only running containers will be stopped. Paused, stopped, or created
// containers will be ignored.
// If cleanup is true, mounts and network namespaces will be cleaned up after
// the container is stopped.
// All containers are stopped independently. An error stopping one container
// will not prevent other containers being stopped.
// An error and a map[string]error are returned.
// If the error is not nil and the map is nil, an error was encountered before
// any containers were stopped.
// If map is not nil, an error was encountered when stopping one or more
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrPodPartialFail.
// If both error and the map are nil, all containers were stopped without error.
func (p *Pod) StopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
return p.stopWithTimeout(ctx, cleanup, timeout)
}
func (p *Pod) stopWithTimeout(ctx context.Context, cleanup bool, timeout int) (map[string]error, error) {
if !p.valid {
return nil, define.ErrPodRemoved
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
// Stopping pods is not ordered by dependency. We haven't seen any case
// where this would actually matter.
ctrErrChan := make(map[string]<-chan error)
// Enqueue a function for each container with the parallel executor.
for _, ctr := range allCtrs {
c := ctr
logrus.Debugf("Adding parallel job to stop container %s", c.ID())
retChan := parallel.Enqueue(ctx, func() error {
// Can't batch these without forcing Stop() to hold the
// lock for the full duration of the timeout.
// We probably don't want to do that.
if timeout > -1 {
if err := c.StopWithTimeout(uint(timeout)); err != nil {
return err
}
} else {
if err := c.Stop(); err != nil {
return err
}
}
if cleanup {
return c.Cleanup(ctx)
}
return nil
})
ctrErrChan[c.ID()] = retChan
}
p.newPodEvent(events.Stop)
ctrErrors := make(map[string]error)
// Get returned error for every container we worked on
for id, channel := range ctrErrChan {
if err := <-channel; err != nil {
if errors.Is(err, define.ErrCtrStateInvalid) || errors.Is(err, define.ErrCtrStopped) {
continue
}
ctrErrors[id] = err
}
}
if len(ctrErrors) > 0 {
return ctrErrors, fmt.Errorf("stopping some containers: %w", define.ErrPodPartialFail)
}
if err := p.maybeStopServiceContainer(); err != nil {
return nil, err
}
return nil, nil
}
// Stops the pod if only the infra containers remains running.
func (p *Pod) stopIfOnlyInfraRemains(ctx context.Context, ignoreID string) error {
p.lock.Lock()
defer p.lock.Unlock()
infraID := ""
if p.HasInfraContainer() {
infra, err := p.infraContainer()
if err != nil {
return err
}
infraID = infra.ID()
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return err
}
for _, ctr := range allCtrs {
if ctr.ID() == infraID || ctr.ID() == ignoreID {
continue
}
state, err := ctr.State()
if err != nil {
return fmt.Errorf("getting state of container %s: %w", ctr.ID(), err)
}
switch state {
case define.ContainerStateExited,
define.ContainerStateRemoving,
define.ContainerStateStopping,
define.ContainerStateUnknown:
continue
default:
return nil
}
}
_, err = p.stopWithTimeout(ctx, true, -1)
return err
}
// Cleanup cleans up all containers within a pod that have stopped.
// All containers are cleaned up independently. An error with one container will
// not prevent other containers being cleaned up.
// An error and a map[string]error are returned.
// If the error is not nil and the map is nil, an error was encountered before
// any containers were cleaned up.
// If map is not nil, an error was encountered when working on one or more
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrPodPartialFail.
// If both error and the map are nil, all containers were paused without error
func (p *Pod) Cleanup(ctx context.Context) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return nil, define.ErrPodRemoved
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
ctrErrChan := make(map[string]<-chan error)
// Enqueue a function for each container with the parallel executor.
for _, ctr := range allCtrs {
c := ctr
logrus.Debugf("Adding parallel job to clean up container %s", c.ID())
retChan := parallel.Enqueue(ctx, func() error {
return c.Cleanup(ctx)
})
ctrErrChan[c.ID()] = retChan
}
ctrErrors := make(map[string]error)
// Get returned error for every container we worked on
for id, channel := range ctrErrChan {
if err := <-channel; err != nil {
if errors.Is(err, define.ErrCtrStateInvalid) || errors.Is(err, define.ErrCtrStopped) {
continue
}
ctrErrors[id] = err
}
}
if len(ctrErrors) > 0 {
return ctrErrors, fmt.Errorf("cleaning up some containers: %w", define.ErrPodPartialFail)
}
if err := p.maybeStopServiceContainer(); err != nil {
return nil, err
}
return nil, nil
}
// Pause pauses all containers within a pod that are running.
// Only running containers will be paused. Paused, stopped, or created
// containers will be ignored.
// All containers are paused independently. An error pausing one container
// will not prevent other containers being paused.
// An error and a map[string]error are returned.
// If the error is not nil and the map is nil, an error was encountered before
// any containers were paused.
// If map is not nil, an error was encountered when pausing one or more
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrPodPartialFail.
// If both error and the map are nil, all containers were paused without error
func (p *Pod) Pause(ctx context.Context) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return nil, define.ErrPodRemoved
}
if rootless.IsRootless() {
cgroupv2, err := cgroups.IsCgroup2UnifiedMode()
if err != nil {
return nil, fmt.Errorf("failed to determine cgroupversion: %w", err)
}
if !cgroupv2 {
return nil, fmt.Errorf("can not pause pods containing rootless containers with cgroup V1: %w", define.ErrNoCgroups)
}
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
ctrErrChan := make(map[string]<-chan error)
// Enqueue a function for each container with the parallel executor.
for _, ctr := range allCtrs {
c := ctr
logrus.Debugf("Adding parallel job to pause container %s", c.ID())
retChan := parallel.Enqueue(ctx, c.Pause)
ctrErrChan[c.ID()] = retChan
}
p.newPodEvent(events.Pause)
ctrErrors := make(map[string]error)
// Get returned error for every container we worked on
for id, channel := range ctrErrChan {
if err := <-channel; err != nil {
if errors.Is(err, define.ErrCtrStateInvalid) || errors.Is(err, define.ErrCtrStopped) {
continue
}
ctrErrors[id] = err
}
}
if len(ctrErrors) > 0 {
return ctrErrors, fmt.Errorf("pausing some containers: %w", define.ErrPodPartialFail)
}
return nil, nil
}
// Unpause unpauses all containers within a pod that are running.
// Only paused containers will be unpaused. Running, stopped, or created
// containers will be ignored.
// All containers are unpaused independently. An error unpausing one container
// will not prevent other containers being unpaused.
// An error and a map[string]error are returned.
// If the error is not nil and the map is nil, an error was encountered before
// any containers were unpaused.
// If map is not nil, an error was encountered when unpausing one or more
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrPodPartialFail.
// If both error and the map are nil, all containers were unpaused without error.
func (p *Pod) Unpause(ctx context.Context) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return nil, define.ErrPodRemoved
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
ctrErrChan := make(map[string]<-chan error)
// Enqueue a function for each container with the parallel executor.
for _, ctr := range allCtrs {
c := ctr
logrus.Debugf("Adding parallel job to unpause container %s", c.ID())
retChan := parallel.Enqueue(ctx, c.Unpause)
ctrErrChan[c.ID()] = retChan
}
p.newPodEvent(events.Unpause)
ctrErrors := make(map[string]error)
// Get returned error for every container we worked on
for id, channel := range ctrErrChan {
if err := <-channel; err != nil {
if errors.Is(err, define.ErrCtrStateInvalid) || errors.Is(err, define.ErrCtrStopped) {
continue
}
ctrErrors[id] = err
}
}
if len(ctrErrors) > 0 {
return ctrErrors, fmt.Errorf("unpausing some containers: %w", define.ErrPodPartialFail)
}
return nil, nil
}
// Restart restarts all containers within a pod that are not paused or in an error state.
// It combines the effects of Stop() and Start() on a container
// Each container will use its own stop timeout.
// All containers are started independently, in order dictated by their
// dependencies. An error restarting one container
// will not prevent other containers being restarted.
// An error and a map[string]error are returned.
// If the error is not nil and the map is nil, an error was encountered before
// any containers were restarted.
// If map is not nil, an error was encountered when restarting one or more
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrPodPartialFail.
// If both error and the map are nil, all containers were restarted without error.
func (p *Pod) Restart(ctx context.Context) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return nil, define.ErrPodRemoved
}
if err := p.maybeStartServiceContainer(ctx); err != nil {
return nil, err
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
// Build a dependency graph of containers in the pod
graph, err := BuildContainerGraph(allCtrs)
if err != nil {
return nil, fmt.Errorf("generating dependency graph for pod %s: %w", p.ID(), err)
}
ctrErrors := make(map[string]error)
ctrsVisited := make(map[string]bool)
// If there are no containers without dependencies, we can't start
// Error out
if len(graph.noDepNodes) == 0 {
return nil, fmt.Errorf("no containers in pod %s have no dependencies, cannot start pod: %w", p.ID(), define.ErrNoSuchCtr)
}
// Traverse the graph beginning at nodes with no dependencies
for _, node := range graph.noDepNodes {
startNode(ctx, node, false, ctrErrors, ctrsVisited, true)
}
if len(ctrErrors) > 0 {
return ctrErrors, fmt.Errorf("stopping some containers: %w", define.ErrPodPartialFail)
}
p.newPodEvent(events.Stop)
p.newPodEvent(events.Start)
return nil, nil
}
// Kill sends a signal to all running containers within a pod.
// Signals will only be sent to running containers. Containers that are not
// running will be ignored. All signals are sent independently, and sending will
// continue even if some containers encounter errors.
// An error and a map[string]error are returned.
// If the error is not nil and the map is nil, an error was encountered before
// any containers were signalled.
// If map is not nil, an error was encountered when signalling one or more
// containers. The container ID is mapped to the error encountered. The error is
// set to ErrPodPartialFail.
// If both error and the map are nil, all containers were signalled successfully.
func (p *Pod) Kill(ctx context.Context, signal uint) (map[string]error, error) {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return nil, define.ErrPodRemoved
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
ctrErrChan := make(map[string]<-chan error)
// Enqueue a function for each container with the parallel executor.
for _, ctr := range allCtrs {
c := ctr
logrus.Debugf("Adding parallel job to kill container %s", c.ID())
retChan := parallel.Enqueue(ctx, func() error {
return c.Kill(signal)
})
ctrErrChan[c.ID()] = retChan
}
p.newPodEvent(events.Kill)
ctrErrors := make(map[string]error)
// Get returned error for every container we worked on
for id, channel := range ctrErrChan {
if err := <-channel; err != nil {
if errors.Is(err, define.ErrCtrStateInvalid) || errors.Is(err, define.ErrCtrStopped) {
continue
}
ctrErrors[id] = err
}
}
if len(ctrErrors) > 0 {
return ctrErrors, fmt.Errorf("killing some containers: %w", define.ErrPodPartialFail)
}
if err := p.maybeStopServiceContainer(); err != nil {
return nil, err
}
return nil, nil
}
// Status gets the status of all containers in the pod.
// Returns a map of Container ID to Container Status.
func (p *Pod) Status() (map[string]define.ContainerStatus, error) {
p.lock.Lock()
defer p.lock.Unlock()
if !p.valid {
return nil, define.ErrPodRemoved
}
allCtrs, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
noInitCtrs := make([]*Container, 0)
// Do not add init containers into status
for _, ctr := range allCtrs {
if ctrType := ctr.config.InitContainerType; len(ctrType) < 1 {
noInitCtrs = append(noInitCtrs, ctr)
}
}
return containerStatusFromContainers(noInitCtrs)
}
func containerStatusFromContainers(allCtrs []*Container) (map[string]define.ContainerStatus, error) {
status := make(map[string]define.ContainerStatus, len(allCtrs))
for _, ctr := range allCtrs {
state, err := ctr.State()
if err != nil {
return nil, err
}
status[ctr.ID()] = state
}
return status, nil
}
// Inspect returns a PodInspect struct to describe the pod.
func (p *Pod) Inspect() (*define.InspectPodData, error) {
p.lock.Lock()
defer p.lock.Unlock()
if err := p.updatePod(); err != nil {
return nil, err
}
containers, err := p.runtime.state.PodContainers(p)
if err != nil {
return nil, err
}
ctrs := make([]define.InspectPodContainerInfo, 0, len(containers))
ctrStatuses := make(map[string]define.ContainerStatus, len(containers))
for _, c := range containers {
containerStatus := "unknown"
// Ignoring possible errors here because we don't want this to be
// catastrophic in nature
containerState, err := c.State()
if err == nil {
containerStatus = containerState.String()
}
ctrs = append(ctrs, define.InspectPodContainerInfo{
ID: c.ID(),
Name: c.Name(),
State: containerStatus,
})
// Do not add init containers fdr status
if len(c.config.InitContainerType) < 1 {
ctrStatuses[c.ID()] = c.state.State
}
}
podState, err := createPodStatusResults(ctrStatuses)
if err != nil {
return nil, err
}
namespaces := map[string]bool{
"pid": p.config.UsePodPID,
"ipc": p.config.UsePodIPC,
"net": p.config.UsePodNet,
"mount": p.config.UsePodMount,
"user": p.config.UsePodUser,
"uts": p.config.UsePodUTS,
"cgroup": p.config.UsePodCgroupNS,
}
sharesNS := []string{}
for nsStr, include := range namespaces {
if include {
sharesNS = append(sharesNS, nsStr)
}
}
// Infra config contains detailed information on the pod's infra
// container.
var infraConfig *define.InspectPodInfraConfig
var inspectMounts []define.InspectMount
var devices []define.InspectDevice
var infraSecurity []string
if p.state.InfraContainerID != "" {
infra, err := p.runtime.GetContainer(p.state.InfraContainerID)
if err != nil {
return nil, err
}
infraConfig = new(define.InspectPodInfraConfig)
infraConfig.HostNetwork = p.NetworkMode() == "host"
infraConfig.StaticIP = infra.config.ContainerNetworkConfig.StaticIP
infraConfig.NoManageResolvConf = infra.config.UseImageResolvConf
infraConfig.NoManageHosts = infra.config.UseImageHosts
infraConfig.CPUPeriod = p.CPUPeriod()
infraConfig.CPUQuota = p.CPUQuota()
infraConfig.CPUSetCPUs = p.ResourceLim().CPU.Cpus
infraConfig.PidNS = p.NamespaceMode(specs.PIDNamespace)
infraConfig.UserNS = p.NamespaceMode(specs.UserNamespace)
infraConfig.UtsNS = p.NamespaceMode(specs.UTSNamespace)
namedVolumes, mounts := infra.SortUserVolumes(infra.config.Spec)
inspectMounts, err = infra.GetMounts(namedVolumes, infra.config.ImageVolumes, mounts)
infraSecurity = infra.GetSecurityOptions()
if err != nil {
return nil, err
}
if len(infra.config.ContainerNetworkConfig.DNSServer) > 0 {
infraConfig.DNSServer = make([]string, 0, len(infra.config.ContainerNetworkConfig.DNSServer))
for _, entry := range infra.config.ContainerNetworkConfig.DNSServer {
infraConfig.DNSServer = append(infraConfig.DNSServer, entry.String())
}
}
if len(infra.config.ContainerNetworkConfig.DNSSearch) > 0 {
infraConfig.DNSSearch = make([]string, 0, len(infra.config.ContainerNetworkConfig.DNSSearch))
infraConfig.DNSSearch = append(infraConfig.DNSSearch, infra.config.ContainerNetworkConfig.DNSSearch...)
}
if len(infra.config.ContainerNetworkConfig.DNSOption) > 0 {
infraConfig.DNSOption = make([]string, 0, len(infra.config.ContainerNetworkConfig.DNSOption))
infraConfig.DNSOption = append(infraConfig.DNSOption, infra.config.ContainerNetworkConfig.DNSOption...)
}
if len(infra.config.HostAdd) > 0 {
infraConfig.HostAdd = make([]string, 0, len(infra.config.HostAdd))
infraConfig.HostAdd = append(infraConfig.HostAdd, infra.config.HostAdd...)
}
networks, err := infra.networks()
if err != nil {
return nil, err
}
netNames := make([]string, 0, len(networks))
for name := range networks {
netNames = append(netNames, name)
}
if len(netNames) > 0 {
infraConfig.Networks = netNames
}
infraConfig.NetworkOptions = infra.config.ContainerNetworkConfig.NetworkOptions
infraConfig.PortBindings = makeInspectPortBindings(infra.config.ContainerNetworkConfig.PortMappings)
}
inspectData := define.InspectPodData{
ID: p.ID(),
Name: p.Name(),
Namespace: p.Namespace(),
Created: p.CreatedTime(),
CreateCommand: p.config.CreateCommand,
ExitPolicy: string(p.config.ExitPolicy),
State: podState,
Hostname: p.config.Hostname,
Labels: p.Labels(),
CreateCgroup: p.config.UsePodCgroup,
CgroupParent: p.CgroupParent(),
CgroupPath: p.state.CgroupPath,
CreateInfra: infraConfig != nil,
InfraContainerID: p.state.InfraContainerID,
InfraConfig: infraConfig,
SharedNamespaces: sharesNS,
NumContainers: uint(len(containers)),
Containers: ctrs,
CPUSetCPUs: p.ResourceLim().CPU.Cpus,
CPUPeriod: p.CPUPeriod(),
CPUQuota: p.CPUQuota(),
MemoryLimit: p.MemoryLimit(),
Mounts: inspectMounts,
Devices: devices,
BlkioDeviceReadBps: p.BlkiThrottleReadBps(),
VolumesFrom: p.VolumesFrom(),
SecurityOpts: infraSecurity,
MemorySwap: p.MemorySwap(),
BlkioWeight: p.BlkioWeight(),
CPUSetMems: p.CPUSetMems(),
BlkioDeviceWriteBps: p.BlkiThrottleWriteBps(),
CPUShares: p.CPUShares(),
RestartPolicy: p.config.RestartPolicy,
}
return &inspectData, nil
}