mirror of
				https://github.com/containers/podman.git
				synced 2025-10-26 02:35:43 +08:00 
			
		
		
		
	 c00ea686fe
			
		
	
	c00ea686fe
	
	
	
		
			
			added the following flags and handling for podman pod create --memory-swap --cpuset-mems --device-read-bps --device-write-bps --blkio-weight --blkio-weight-device --cpu-shares given the new backend for systemd in c/common, all of these can now be exposed to pod create. most of the heavy lifting (nearly all) is done within c/common. However, some rewiring needed to be done here as well! Signed-off-by: Charlie Doern <cdoern@redhat.com>
		
			
				
	
	
		
			513 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			513 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package libpod
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"sort"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/containers/common/pkg/config"
 | |
| 	"github.com/containers/podman/v4/libpod/define"
 | |
| 	"github.com/containers/podman/v4/libpod/lock"
 | |
| 	"github.com/opencontainers/runtime-spec/specs-go"
 | |
| )
 | |
| 
 | |
| // Pod represents a group of containers that are managed together.
 | |
| // Any operations on a Pod that access state must begin with a call to
 | |
| // updatePod().
 | |
| // There is no guarantee that state exists in a readable state before this call,
 | |
| // and even if it does its contents will be out of date and must be refreshed
 | |
| // from the database.
 | |
| // Generally, this requirement applies only to top-level functions; helpers can
 | |
| // assume their callers handled this requirement. Generally speaking, if a
 | |
| // function takes the pod lock and accesses any part of state, it should
 | |
| // updatePod() immediately after locking.
 | |
| // Pod represents a group of containers that may share namespaces
 | |
| type Pod struct {
 | |
| 	config *PodConfig
 | |
| 	state  *podState
 | |
| 
 | |
| 	valid   bool
 | |
| 	runtime *Runtime
 | |
| 	lock    lock.Locker
 | |
| }
 | |
| 
 | |
| // PodConfig represents a pod's static configuration
 | |
| type PodConfig struct {
 | |
| 	ID   string `json:"id"`
 | |
| 	Name string `json:"name"`
 | |
| 	// Namespace the pod is in
 | |
| 	Namespace string `json:"namespace,omitempty"`
 | |
| 
 | |
| 	Hostname string `json:"hostname,omitempty"`
 | |
| 
 | |
| 	// Labels contains labels applied to the pod
 | |
| 	Labels map[string]string `json:"labels"`
 | |
| 	// CgroupParent contains the pod's Cgroup parent
 | |
| 	CgroupParent string `json:"cgroupParent"`
 | |
| 	// UsePodCgroup indicates whether the pod will create its own Cgroup and
 | |
| 	// join containers to it.
 | |
| 	// If true, all containers joined to the pod will use the pod cgroup as
 | |
| 	// their cgroup parent, and cannot set a different cgroup parent
 | |
| 	UsePodCgroup bool `json:"sharesCgroup,omitempty"`
 | |
| 
 | |
| 	// The following UsePod{kernelNamespace} indicate whether the containers
 | |
| 	// in the pod will inherit the namespace from the first container in the pod.
 | |
| 	UsePodPID      bool `json:"sharesPid,omitempty"`
 | |
| 	UsePodIPC      bool `json:"sharesIpc,omitempty"`
 | |
| 	UsePodNet      bool `json:"sharesNet,omitempty"`
 | |
| 	UsePodMount    bool `json:"sharesMnt,omitempty"`
 | |
| 	UsePodUser     bool `json:"sharesUser,omitempty"`
 | |
| 	UsePodUTS      bool `json:"sharesUts,omitempty"`
 | |
| 	UsePodCgroupNS bool `json:"sharesCgroupNS,omitempty"`
 | |
| 
 | |
| 	HasInfra bool `json:"hasInfra,omitempty"`
 | |
| 
 | |
| 	// ServiceContainerID is the main container of a service.  A service
 | |
| 	// consists of one or more pods.  The service container is started
 | |
| 	// before all pods and is stopped when the last pod stops.
 | |
| 	// The service container allows for tracking and managing the entire
 | |
| 	// life cycle of service which may be started via `podman-play-kube`.
 | |
| 	ServiceContainerID string `json:"serviceContainerID,omitempty"`
 | |
| 
 | |
| 	// Time pod was created
 | |
| 	CreatedTime time.Time `json:"created"`
 | |
| 
 | |
| 	// CreateCommand is the full command plus arguments of the process the
 | |
| 	// container has been created with.
 | |
| 	CreateCommand []string `json:"CreateCommand,omitempty"`
 | |
| 
 | |
| 	// The pod's exit policy.
 | |
| 	ExitPolicy config.PodExitPolicy `json:"ExitPolicy,omitempty"`
 | |
| 
 | |
| 	// ID of the pod's lock
 | |
| 	LockID uint32 `json:"lockID"`
 | |
| 
 | |
| 	// ResourceLimits hold the pod level resource limits
 | |
| 	ResourceLimits specs.LinuxResources
 | |
| }
 | |
| 
 | |
| // podState represents a pod's state
 | |
| type podState struct {
 | |
| 	// CgroupPath is the path to the pod's Cgroup
 | |
| 	CgroupPath string `json:"cgroupPath"`
 | |
| 	// InfraContainerID is the container that holds pod namespace information
 | |
| 	// Most often an infra container
 | |
| 	InfraContainerID string
 | |
| }
 | |
| 
 | |
| // ID retrieves the pod's ID
 | |
| func (p *Pod) ID() string {
 | |
| 	return p.config.ID
 | |
| }
 | |
| 
 | |
| // Name retrieves the pod's name
 | |
| func (p *Pod) Name() string {
 | |
| 	return p.config.Name
 | |
| }
 | |
| 
 | |
| // Namespace returns the pod's libpod namespace.
 | |
| // Namespaces are used to logically separate containers and pods in the state.
 | |
| func (p *Pod) Namespace() string {
 | |
| 	return p.config.Namespace
 | |
| }
 | |
| 
 | |
| // ResourceLim returns the cpuset resource limits for the pod
 | |
| func (p *Pod) ResourceLim() *specs.LinuxResources {
 | |
| 	resCopy := &specs.LinuxResources{}
 | |
| 	empty := &specs.LinuxResources{
 | |
| 		CPU: &specs.LinuxCPU{},
 | |
| 	}
 | |
| 	if err := JSONDeepCopy(p.config.ResourceLimits, resCopy); err != nil {
 | |
| 		return nil
 | |
| 	}
 | |
| 	if resCopy.CPU != nil {
 | |
| 		return resCopy
 | |
| 	}
 | |
| 
 | |
| 	return empty
 | |
| }
 | |
| 
 | |
| // CPUPeriod returns the pod CPU period
 | |
| func (p *Pod) CPUPeriod() uint64 {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.CPU == nil || resLim.CPU.Period == nil {
 | |
| 		return 0
 | |
| 	}
 | |
| 	return *resLim.CPU.Period
 | |
| }
 | |
| 
 | |
| // CPUQuota returns the pod CPU quota
 | |
| func (p *Pod) CPUQuota() int64 {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.CPU == nil || resLim.CPU.Quota == nil {
 | |
| 		return 0
 | |
| 	}
 | |
| 	return *resLim.CPU.Quota
 | |
| }
 | |
| 
 | |
| // MemoryLimit returns the pod Memory Limit
 | |
| func (p *Pod) MemoryLimit() uint64 {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.Memory == nil || resLim.Memory.Limit == nil {
 | |
| 		return 0
 | |
| 	}
 | |
| 	return uint64(*resLim.Memory.Limit)
 | |
| }
 | |
| 
 | |
| // MemorySwap returns the pod Memory swap limit
 | |
| func (p *Pod) MemorySwap() uint64 {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.Memory == nil || resLim.Memory.Swap == nil {
 | |
| 		return 0
 | |
| 	}
 | |
| 	return uint64(*resLim.Memory.Swap)
 | |
| }
 | |
| 
 | |
| // BlkioWeight returns the pod blkio weight
 | |
| func (p *Pod) BlkioWeight() uint64 {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.BlockIO == nil || resLim.BlockIO.Weight == nil {
 | |
| 		return 0
 | |
| 	}
 | |
| 	return uint64(*resLim.BlockIO.Weight)
 | |
| }
 | |
| 
 | |
| // CPUSetMems returns the pod CPUSet memory nodes
 | |
| func (p *Pod) CPUSetMems() string {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.CPU == nil {
 | |
| 		return ""
 | |
| 	}
 | |
| 	return resLim.CPU.Mems
 | |
| }
 | |
| 
 | |
| // CPUShares returns the pod cpu shares
 | |
| func (p *Pod) CPUShares() uint64 {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.CPU == nil || resLim.CPU.Shares == nil {
 | |
| 		return 0
 | |
| 	}
 | |
| 	return *resLim.CPU.Shares
 | |
| }
 | |
| 
 | |
| // BlkiThrottleReadBps returns the pod  throttle devices
 | |
| func (p *Pod) BlkiThrottleReadBps() []define.InspectBlkioThrottleDevice {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.BlockIO == nil || resLim.BlockIO.ThrottleReadBpsDevice == nil {
 | |
| 		return []define.InspectBlkioThrottleDevice{}
 | |
| 	}
 | |
| 	devs, err := blkioDeviceThrottle(nil, resLim.BlockIO.ThrottleReadBpsDevice)
 | |
| 	if err != nil {
 | |
| 		return []define.InspectBlkioThrottleDevice{}
 | |
| 	}
 | |
| 	return devs
 | |
| }
 | |
| 
 | |
| // BlkiThrottleWriteBps returns the pod  throttle devices
 | |
| func (p *Pod) BlkiThrottleWriteBps() []define.InspectBlkioThrottleDevice {
 | |
| 	resLim := p.ResourceLim()
 | |
| 	if resLim.BlockIO == nil || resLim.BlockIO.ThrottleWriteBpsDevice == nil {
 | |
| 		return []define.InspectBlkioThrottleDevice{}
 | |
| 	}
 | |
| 	devs, err := blkioDeviceThrottle(nil, resLim.BlockIO.ThrottleWriteBpsDevice)
 | |
| 	if err != nil {
 | |
| 		return []define.InspectBlkioThrottleDevice{}
 | |
| 	}
 | |
| 	return devs
 | |
| }
 | |
| 
 | |
| // NetworkMode returns the Network mode given by the user ex: pod, private...
 | |
| func (p *Pod) NetworkMode() string {
 | |
| 	infra, err := p.runtime.GetContainer(p.state.InfraContainerID)
 | |
| 	if err != nil {
 | |
| 		return ""
 | |
| 	}
 | |
| 	return infra.NetworkMode()
 | |
| }
 | |
| 
 | |
| // Namespace Mode returns the given NS mode provided by the user ex: host, private...
 | |
| func (p *Pod) NamespaceMode(kind specs.LinuxNamespaceType) string {
 | |
| 	infra, err := p.runtime.GetContainer(p.state.InfraContainerID)
 | |
| 	if err != nil {
 | |
| 		return ""
 | |
| 	}
 | |
| 	ctrSpec := infra.config.Spec
 | |
| 	if ctrSpec != nil && ctrSpec.Linux != nil {
 | |
| 		for _, ns := range ctrSpec.Linux.Namespaces {
 | |
| 			if ns.Type == kind {
 | |
| 				if ns.Path != "" {
 | |
| 					return fmt.Sprintf("ns:%s", ns.Path)
 | |
| 				}
 | |
| 				return "private"
 | |
| 			}
 | |
| 		}
 | |
| 		return "host"
 | |
| 	}
 | |
| 	return ""
 | |
| }
 | |
| 
 | |
| // CPUQuota returns the pod CPU quota
 | |
| func (p *Pod) VolumesFrom() []string {
 | |
| 	if p.state.InfraContainerID == "" {
 | |
| 		return nil
 | |
| 	}
 | |
| 	infra, err := p.runtime.GetContainer(p.state.InfraContainerID)
 | |
| 	if err != nil {
 | |
| 		return nil
 | |
| 	}
 | |
| 	if ctrs, ok := infra.config.Spec.Annotations[define.InspectAnnotationVolumesFrom]; ok {
 | |
| 		return strings.Split(ctrs, ",")
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Labels returns the pod's labels
 | |
| func (p *Pod) Labels() map[string]string {
 | |
| 	labels := make(map[string]string)
 | |
| 	for key, value := range p.config.Labels {
 | |
| 		labels[key] = value
 | |
| 	}
 | |
| 
 | |
| 	return labels
 | |
| }
 | |
| 
 | |
| // CreatedTime gets the time when the pod was created
 | |
| func (p *Pod) CreatedTime() time.Time {
 | |
| 	return p.config.CreatedTime
 | |
| }
 | |
| 
 | |
| // CreateCommand returns the os.Args of the process with which the pod has been
 | |
| // created.
 | |
| func (p *Pod) CreateCommand() []string {
 | |
| 	return p.config.CreateCommand
 | |
| }
 | |
| 
 | |
| // CgroupParent returns the pod's Cgroup parent
 | |
| func (p *Pod) CgroupParent() string {
 | |
| 	return p.config.CgroupParent
 | |
| }
 | |
| 
 | |
| // SharesPID returns whether containers in pod
 | |
| // default to use PID namespace of first container in pod
 | |
| func (p *Pod) SharesPID() bool {
 | |
| 	return p.config.UsePodPID
 | |
| }
 | |
| 
 | |
| // SharesIPC returns whether containers in pod
 | |
| // default to use IPC namespace of first container in pod
 | |
| func (p *Pod) SharesIPC() bool {
 | |
| 	return p.config.UsePodIPC
 | |
| }
 | |
| 
 | |
| // SharesNet returns whether containers in pod
 | |
| // default to use network namespace of first container in pod
 | |
| func (p *Pod) SharesNet() bool {
 | |
| 	return p.config.UsePodNet
 | |
| }
 | |
| 
 | |
| // SharesMount returns whether containers in pod
 | |
| // default to use PID namespace of first container in pod
 | |
| func (p *Pod) SharesMount() bool {
 | |
| 	return p.config.UsePodMount
 | |
| }
 | |
| 
 | |
| // SharesUser returns whether containers in pod
 | |
| // default to use user namespace of first container in pod
 | |
| func (p *Pod) SharesUser() bool {
 | |
| 	return p.config.UsePodUser
 | |
| }
 | |
| 
 | |
| // SharesUTS returns whether containers in pod
 | |
| // default to use UTS namespace of first container in pod
 | |
| func (p *Pod) SharesUTS() bool {
 | |
| 	return p.config.UsePodUTS
 | |
| }
 | |
| 
 | |
| // SharesCgroup returns whether containers in the pod will default to this pod's
 | |
| // cgroup instead of the default libpod parent
 | |
| func (p *Pod) SharesCgroup() bool {
 | |
| 	return p.config.UsePodCgroupNS
 | |
| }
 | |
| 
 | |
| // Hostname returns the hostname of the pod.
 | |
| func (p *Pod) Hostname() string {
 | |
| 	return p.config.Hostname
 | |
| }
 | |
| 
 | |
| // CgroupPath returns the path to the pod's Cgroup
 | |
| func (p *Pod) CgroupPath() (string, error) {
 | |
| 	p.lock.Lock()
 | |
| 	defer p.lock.Unlock()
 | |
| 	if err := p.updatePod(); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	if p.state.InfraContainerID == "" {
 | |
| 		return "", fmt.Errorf("pod has no infra container: %w", define.ErrNoSuchCtr)
 | |
| 	}
 | |
| 	return p.state.CgroupPath, nil
 | |
| }
 | |
| 
 | |
| // HasContainer checks if a container is present in the pod
 | |
| func (p *Pod) HasContainer(id string) (bool, error) {
 | |
| 	if !p.valid {
 | |
| 		return false, define.ErrPodRemoved
 | |
| 	}
 | |
| 
 | |
| 	return p.runtime.state.PodHasContainer(p, id)
 | |
| }
 | |
| 
 | |
| // AllContainersByID returns the container IDs of all the containers in the pod
 | |
| func (p *Pod) AllContainersByID() ([]string, error) {
 | |
| 	p.lock.Lock()
 | |
| 	defer p.lock.Unlock()
 | |
| 
 | |
| 	if !p.valid {
 | |
| 		return nil, define.ErrPodRemoved
 | |
| 	}
 | |
| 
 | |
| 	return p.runtime.state.PodContainersByID(p)
 | |
| }
 | |
| 
 | |
| // AllContainers retrieves the containers in the pod
 | |
| func (p *Pod) AllContainers() ([]*Container, error) {
 | |
| 	if !p.valid {
 | |
| 		return nil, define.ErrPodRemoved
 | |
| 	}
 | |
| 	p.lock.Lock()
 | |
| 	defer p.lock.Unlock()
 | |
| 	return p.allContainers()
 | |
| }
 | |
| 
 | |
| func (p *Pod) allContainers() ([]*Container, error) {
 | |
| 	return p.runtime.state.PodContainers(p)
 | |
| }
 | |
| 
 | |
| // HasInfraContainer returns whether the pod will create an infra container
 | |
| func (p *Pod) HasInfraContainer() bool {
 | |
| 	return p.config.HasInfra
 | |
| }
 | |
| 
 | |
| // SharesNamespaces checks if the pod has any kernel namespaces set as shared. An infra container will not be
 | |
| // created if no kernel namespaces are shared.
 | |
| func (p *Pod) SharesNamespaces() bool {
 | |
| 	return p.SharesPID() || p.SharesIPC() || p.SharesNet() || p.SharesMount() || p.SharesUser() || p.SharesUTS()
 | |
| }
 | |
| 
 | |
| // infraContainerID returns the infra ID without a lock
 | |
| func (p *Pod) infraContainerID() (string, error) {
 | |
| 	if err := p.updatePod(); err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	return p.state.InfraContainerID, nil
 | |
| }
 | |
| 
 | |
| // InfraContainerID returns the infra container ID for a pod.
 | |
| // If the container returned is "", the pod has no infra container.
 | |
| func (p *Pod) InfraContainerID() (string, error) {
 | |
| 	p.lock.Lock()
 | |
| 	defer p.lock.Unlock()
 | |
| 	return p.infraContainerID()
 | |
| }
 | |
| 
 | |
| // infraContainer is the unlocked version of InfraContainer which returns the infra container
 | |
| func (p *Pod) infraContainer() (*Container, error) {
 | |
| 	id, err := p.infraContainerID()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	if id == "" {
 | |
| 		return nil, fmt.Errorf("pod has no infra container: %w", define.ErrNoSuchCtr)
 | |
| 	}
 | |
| 
 | |
| 	return p.runtime.state.Container(id)
 | |
| }
 | |
| 
 | |
| // InfraContainer returns the infra container.
 | |
| func (p *Pod) InfraContainer() (*Container, error) {
 | |
| 	p.lock.Lock()
 | |
| 	defer p.lock.Unlock()
 | |
| 	return p.infraContainer()
 | |
| }
 | |
| 
 | |
| // TODO add pod batching
 | |
| // Lock pod to avoid lock contention
 | |
| // Store and lock all containers (no RemoveContainer in batch guarantees cache will not become stale)
 | |
| 
 | |
| // PodContainerStats is an organization struct for pods and their containers
 | |
| type PodContainerStats struct {
 | |
| 	Pod            *Pod
 | |
| 	ContainerStats map[string]*define.ContainerStats
 | |
| }
 | |
| 
 | |
| // GetPodStats returns the stats for each of its containers
 | |
| func (p *Pod) GetPodStats(previousContainerStats map[string]*define.ContainerStats) (map[string]*define.ContainerStats, error) {
 | |
| 	p.lock.Lock()
 | |
| 	defer p.lock.Unlock()
 | |
| 
 | |
| 	if err := p.updatePod(); err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	containers, err := p.runtime.state.PodContainers(p)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	newContainerStats := make(map[string]*define.ContainerStats)
 | |
| 	for _, c := range containers {
 | |
| 		newStats, err := c.GetContainerStats(previousContainerStats[c.ID()])
 | |
| 		// If the container wasn't running, don't include it
 | |
| 		// but also suppress the error
 | |
| 		if err != nil && !errors.Is(err, define.ErrCtrStateInvalid) {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		if err == nil {
 | |
| 			newContainerStats[c.ID()] = newStats
 | |
| 		}
 | |
| 	}
 | |
| 	return newContainerStats, nil
 | |
| }
 | |
| 
 | |
| // ProcessLabel returns the SELinux label associated with the pod
 | |
| func (p *Pod) ProcessLabel() (string, error) {
 | |
| 	if !p.HasInfraContainer() {
 | |
| 		return "", nil
 | |
| 	}
 | |
| 	ctr, err := p.infraContainer()
 | |
| 	if err != nil {
 | |
| 		return "", err
 | |
| 	}
 | |
| 	return ctr.ProcessLabel(), nil
 | |
| }
 | |
| 
 | |
| // initContainers returns the list of initcontainers
 | |
| // in a pod sorted by create time
 | |
| func (p *Pod) initContainers() ([]*Container, error) {
 | |
| 	initCons := make([]*Container, 0)
 | |
| 	// the pod is already locked when this is called
 | |
| 	cons, err := p.allContainers()
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	// Sort the pod containers by created time
 | |
| 	sort.Slice(cons, func(i, j int) bool { return cons[i].CreatedTime().Before(cons[j].CreatedTime()) })
 | |
| 	// Iterate sorted containers and add ids for any init containers
 | |
| 	for _, c := range cons {
 | |
| 		if len(c.config.InitContainerType) > 0 {
 | |
| 			initCons = append(initCons, c)
 | |
| 		}
 | |
| 	}
 | |
| 	return initCons, nil
 | |
| }
 | |
| 
 | |
| func (p *Pod) Config() (*PodConfig, error) {
 | |
| 	p.lock.Lock()
 | |
| 	defer p.lock.Unlock()
 | |
| 
 | |
| 	conf := &PodConfig{}
 | |
| 
 | |
| 	err := JSONDeepCopy(p.config, conf)
 | |
| 
 | |
| 	return conf, err
 | |
| }
 |