resource limits for pods

added the following flags and handling for podman pod create

--memory-swap
--cpuset-mems
--device-read-bps
--device-write-bps
--blkio-weight
--blkio-weight-device
--cpu-shares

given the new backend for systemd in c/common, all of these can now be exposed to pod create.
most of the heavy lifting (nearly all) is done within c/common. However, some rewiring needed to be done here
as well!

Signed-off-by: Charlie Doern <cdoern@redhat.com>
This commit is contained in:
Charlie Doern
2022-07-07 14:44:10 -04:00
parent 5f53a67742
commit c00ea686fe
15 changed files with 530 additions and 227 deletions

View File

@ -56,22 +56,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
) )
_ = cmd.RegisterFlagCompletionFunc(authfileFlagName, completion.AutocompleteDefault) _ = cmd.RegisterFlagCompletionFunc(authfileFlagName, completion.AutocompleteDefault)
blkioWeightFlagName := "blkio-weight"
createFlags.StringVar(
&cf.BlkIOWeight,
blkioWeightFlagName, "",
"Block IO weight (relative weight) accepts a weight value between 10 and 1000.",
)
_ = cmd.RegisterFlagCompletionFunc(blkioWeightFlagName, completion.AutocompleteNone)
blkioWeightDeviceFlagName := "blkio-weight-device"
createFlags.StringSliceVar(
&cf.BlkIOWeightDevice,
blkioWeightDeviceFlagName, []string{},
"Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`)",
)
_ = cmd.RegisterFlagCompletionFunc(blkioWeightDeviceFlagName, completion.AutocompleteDefault)
capAddFlagName := "cap-add" capAddFlagName := "cap-add"
createFlags.StringSliceVar( createFlags.StringSliceVar(
&cf.CapAdd, &cf.CapAdd,
@ -127,14 +111,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
) )
_ = cmd.RegisterFlagCompletionFunc(deviceReadIopsFlagName, completion.AutocompleteDefault) _ = cmd.RegisterFlagCompletionFunc(deviceReadIopsFlagName, completion.AutocompleteDefault)
deviceWriteBpsFlagName := "device-write-bps"
createFlags.StringSliceVar(
&cf.DeviceWriteBPs,
deviceWriteBpsFlagName, []string{},
"Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb)",
)
_ = cmd.RegisterFlagCompletionFunc(deviceWriteBpsFlagName, completion.AutocompleteDefault)
deviceWriteIopsFlagName := "device-write-iops" deviceWriteIopsFlagName := "device-write-iops"
createFlags.StringSliceVar( createFlags.StringSliceVar(
&cf.DeviceWriteIOPs, &cf.DeviceWriteIOPs,
@ -783,14 +759,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
) )
_ = cmd.RegisterFlagCompletionFunc(deviceFlagName, completion.AutocompleteDefault) _ = cmd.RegisterFlagCompletionFunc(deviceFlagName, completion.AutocompleteDefault)
deviceReadBpsFlagName := "device-read-bps"
createFlags.StringSliceVar(
&cf.DeviceReadBPs,
deviceReadBpsFlagName, []string{},
"Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb)",
)
_ = cmd.RegisterFlagCompletionFunc(deviceReadBpsFlagName, completion.AutocompleteDefault)
volumesFromFlagName := "volumes-from" volumesFromFlagName := "volumes-from"
createFlags.StringArrayVar( createFlags.StringArrayVar(
&cf.VolumesFrom, &cf.VolumesFrom,
@ -848,22 +816,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
) )
_ = cmd.RegisterFlagCompletionFunc(cpuRtRuntimeFlagName, completion.AutocompleteNone) _ = cmd.RegisterFlagCompletionFunc(cpuRtRuntimeFlagName, completion.AutocompleteNone)
cpuSharesFlagName := "cpu-shares"
createFlags.Uint64VarP(
&cf.CPUShares,
cpuSharesFlagName, "c", 0,
"CPU shares (relative weight)",
)
_ = cmd.RegisterFlagCompletionFunc(cpuSharesFlagName, completion.AutocompleteNone)
cpusetMemsFlagName := "cpuset-mems"
createFlags.StringVar(
&cf.CPUSetMems,
cpusetMemsFlagName, "",
"Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems.",
)
_ = cmd.RegisterFlagCompletionFunc(cpusetMemsFlagName, completion.AutocompleteNone)
memoryReservationFlagName := "memory-reservation" memoryReservationFlagName := "memory-reservation"
createFlags.StringVar( createFlags.StringVar(
&cf.MemoryReservation, &cf.MemoryReservation,
@ -872,14 +824,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
) )
_ = cmd.RegisterFlagCompletionFunc(memoryReservationFlagName, completion.AutocompleteNone) _ = cmd.RegisterFlagCompletionFunc(memoryReservationFlagName, completion.AutocompleteNone)
memorySwapFlagName := "memory-swap"
createFlags.StringVar(
&cf.MemorySwap,
memorySwapFlagName, "",
"Swap limit equal to memory plus swap: '-1' to enable unlimited swap",
)
_ = cmd.RegisterFlagCompletionFunc(memorySwapFlagName, completion.AutocompleteNone)
memorySwappinessFlagName := "memory-swappiness" memorySwappinessFlagName := "memory-swappiness"
createFlags.Int64Var( createFlags.Int64Var(
&cf.MemorySwappiness, &cf.MemorySwappiness,
@ -913,4 +857,60 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
"Memory limit "+sizeWithUnitFormat, "Memory limit "+sizeWithUnitFormat,
) )
_ = cmd.RegisterFlagCompletionFunc(memoryFlagName, completion.AutocompleteNone) _ = cmd.RegisterFlagCompletionFunc(memoryFlagName, completion.AutocompleteNone)
cpuSharesFlagName := "cpu-shares"
createFlags.Uint64VarP(
&cf.CPUShares,
cpuSharesFlagName, "c", 0,
"CPU shares (relative weight)",
)
_ = cmd.RegisterFlagCompletionFunc(cpuSharesFlagName, completion.AutocompleteNone)
cpusetMemsFlagName := "cpuset-mems"
createFlags.StringVar(
&cf.CPUSetMems,
cpusetMemsFlagName, "",
"Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems.",
)
_ = cmd.RegisterFlagCompletionFunc(cpusetMemsFlagName, completion.AutocompleteNone)
memorySwapFlagName := "memory-swap"
createFlags.StringVar(
&cf.MemorySwap,
memorySwapFlagName, "",
"Swap limit equal to memory plus swap: '-1' to enable unlimited swap",
)
_ = cmd.RegisterFlagCompletionFunc(memorySwapFlagName, completion.AutocompleteNone)
deviceReadBpsFlagName := "device-read-bps"
createFlags.StringSliceVar(
&cf.DeviceReadBPs,
deviceReadBpsFlagName, []string{},
"Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb)",
)
_ = cmd.RegisterFlagCompletionFunc(deviceReadBpsFlagName, completion.AutocompleteDefault)
deviceWriteBpsFlagName := "device-write-bps"
createFlags.StringSliceVar(
&cf.DeviceWriteBPs,
deviceWriteBpsFlagName, []string{},
"Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb)",
)
_ = cmd.RegisterFlagCompletionFunc(deviceWriteBpsFlagName, completion.AutocompleteDefault)
blkioWeightFlagName := "blkio-weight"
createFlags.StringVar(
&cf.BlkIOWeight,
blkioWeightFlagName, "",
"Block IO weight (relative weight) accepts a weight value between 10 and 1000.",
)
_ = cmd.RegisterFlagCompletionFunc(blkioWeightFlagName, completion.AutocompleteNone)
blkioWeightDeviceFlagName := "blkio-weight-device"
createFlags.StringSliceVar(
&cf.BlkIOWeightDevice,
blkioWeightDeviceFlagName, []string{},
"Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`)",
)
_ = cmd.RegisterFlagCompletionFunc(blkioWeightDeviceFlagName, completion.AutocompleteDefault)
} }

View File

@ -11,6 +11,14 @@ podman\-container\-clone - Creates a copy of an existing container
## OPTIONS ## OPTIONS
#### **--blkio-weight**=*weight*
Block IO weight (relative weight) accepts a weight value between 10 and 1000.
#### **--blkio-weight-device**=*weight*
Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`).
#### **--cpu-period**=*limit* #### **--cpu-period**=*limit*
Set the CPU period for the Completely Fair Scheduler (CFS), which is a Set the CPU period for the Completely Fair Scheduler (CFS), which is a
@ -126,6 +134,14 @@ If none are specified, the original container's CPU memory nodes are used.
Remove the original container that we are cloning once used to mimic the configuration. Remove the original container that we are cloning once used to mimic the configuration.
#### **--device-read-bps**=*path*
Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb).
#### **--device-write-bps**=*path*
Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb)
#### **--force**, **-f** #### **--force**, **-f**
Force removal of the original container that we are cloning. Can only be used in conjunction with **--destroy**. Force removal of the original container that we are cloning. Can only be used in conjunction with **--destroy**.

View File

@ -11,10 +11,55 @@ podman\-pod\-clone - Creates a copy of an existing pod
## OPTIONS ## OPTIONS
#### **--blkio-weight**=*weight*
Block IO weight (relative weight) accepts a weight value between 10 and 1000.
#### **--blkio-weight-device**=*weight*
Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`).
#### **--cgroup-parent**=*path* #### **--cgroup-parent**=*path*
Path to cgroups under which the cgroup for the pod will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. Path to cgroups under which the cgroup for the pod will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist.
#### **--cpu-shares**, **-c**=*shares*
CPU shares (relative weight)
By default, all containers get the same proportion of CPU cycles. This proportion
can be modified by changing the container's CPU share weighting relative
to the weighting of all other running containers.
To modify the proportion from the default of 1024, use the **--cpu-shares**
flag to set the weighting to 2 or higher.
The proportion will only apply when CPU-intensive processes are running.
When tasks in one container are idle, other containers can use the
left-over CPU time. The actual amount of CPU time will vary depending on
the number of containers running on the system.
For example, consider three containers, one has a cpu-share of 1024 and
two others have a cpu-share setting of 512. When processes in all three
containers attempt to use 100% of CPU, the first container would receive
50% of the total CPU time. If you add a fourth container with a cpu-share
of 1024, the first container only gets 33% of the CPU. The remaining containers
receive 16.5%, 16.5% and 33% of the CPU.
On a multi-core system, the shares of CPU time are distributed over all CPU
cores. Even if a container is limited to less than 100% of CPU time, it can
use 100% of each individual CPU core.
For example, consider a system with more than three cores. If you start one
container **{C0}** with **-c=512** running one process, and another container
**{C1}** with **-c=1024** running two processes, this can result in the following
division of CPU shares:
PID container CPU CPU share
100 {C0} 0 100% of CPU0
101 {C1} 1 100% of CPU1
102 {C1} 2 100% of CPU2
#### **--cpus** #### **--cpus**
Set a number of CPUs for the pod that overrides the original pods CPU limits. If none are specified, the original pod's Nano CPUs are used. Set a number of CPUs for the pod that overrides the original pods CPU limits. If none are specified, the original pod's Nano CPUs are used.
@ -23,6 +68,15 @@ Set a number of CPUs for the pod that overrides the original pods CPU limits. If
CPUs in which to allow execution (0-3, 0,1). If none are specified, the original pod's CPUset is used. CPUs in which to allow execution (0-3, 0,1). If none are specified, the original pod's CPUset is used.
#### **--cpuset-mems**=*nodes*
Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems.
If there are four memory nodes on the system (0-3), use `--cpuset-mems=0,1`
then processes in the container will only use memory from the first
two memory nodes.
#### **--destroy** #### **--destroy**
Remove the original pod that we are cloning once used to mimic the configuration. Remove the original pod that we are cloning once used to mimic the configuration.
@ -48,6 +102,10 @@ device. The devices that Podman will load modules for when necessary are:
Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb). Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb).
#### **--device-write-bps**=*path*
Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb)
#### **--gidmap**=*pod_gid:host_gid:amount* #### **--gidmap**=*pod_gid:host_gid:amount*
GID map for the user namespace. Using this flag will run all containers in the pod with user namespace enabled. It conflicts with the `--userns` and `--subgidname` flags. GID map for the user namespace. Using this flag will run all containers in the pod with user namespace enabled. It conflicts with the `--userns` and `--subgidname` flags.
@ -90,6 +148,17 @@ RAM. If a limit of 0 is specified (not using **-m**), the container's memory is
not limited. The actual limit may be rounded up to a multiple of the operating not limited. The actual limit may be rounded up to a multiple of the operating
system's page size (the value would be very large, that's millions of trillions). system's page size (the value would be very large, that's millions of trillions).
#### **--memory-swap**=*limit*
A limit value equal to memory plus swap. Must be used with the **-m**
(**--memory**) flag. The swap `LIMIT` should always be larger than **-m**
(**--memory**) value. By default, the swap `LIMIT` will be set to double
the value of --memory.
The format of `LIMIT` is `<number>[<unit>]`. Unit can be `b` (bytes),
`k` (kibibytes), `m` (mebibytes), or `g` (gibibytes). If you don't specify a
unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
#### **--name**, **-n** #### **--name**, **-n**
Set a custom name for the cloned pod. The default if not specified is of the syntax: **<ORIGINAL_NAME>-clone** Set a custom name for the cloned pod. The default if not specified is of the syntax: **<ORIGINAL_NAME>-clone**

View File

@ -23,6 +23,9 @@ podman generates a UUID for each pod, and if a name is not assigned
to the container with **--name** then a random string name will be generated to the container with **--name** then a random string name will be generated
for it. The name is useful any place you need to identify a pod. for it. The name is useful any place you need to identify a pod.
Note: resource limit related flags work by setting the limits explicitly in the pod's cgroup
which by default, is the cgroup parent for all containers joining the pod. Containers are still delegated the ability to set their own resource limits when joining a pod meaning that if you run **podman pod create --cpus=5** you can also run **podman container create --pod=`<pod_id|pod_name>` --cpus=4** and the container will only see the smaller limit. containers do NOT get the pod level cgroup resources if they specify their own cgroup when joining a pod such as **--cgroupns=host**
## OPTIONS ## OPTIONS
#### **--add-host**=*host:ip* #### **--add-host**=*host:ip*
@ -33,10 +36,55 @@ Add a line to /etc/hosts. The format is hostname:ip. The **--add-host**
option can be set multiple times. option can be set multiple times.
The /etc/hosts file is shared between all containers in the pod. The /etc/hosts file is shared between all containers in the pod.
#### **--blkio-weight**=*weight*
Block IO weight (relative weight) accepts a weight value between 10 and 1000.
#### **--blkio-weight-device**=*weight*
Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`).
#### **--cgroup-parent**=*path* #### **--cgroup-parent**=*path*
Path to cgroups under which the cgroup for the pod will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. Path to cgroups under which the cgroup for the pod will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist.
#### **--cpu-shares**, **-c**=*shares*
CPU shares (relative weight)
By default, all containers get the same proportion of CPU cycles. This proportion
can be modified by changing the container's CPU share weighting relative
to the weighting of all other running containers.
To modify the proportion from the default of 1024, use the **--cpu-shares**
flag to set the weighting to 2 or higher.
The proportion will only apply when CPU-intensive processes are running.
When tasks in one container are idle, other containers can use the
left-over CPU time. The actual amount of CPU time will vary depending on
the number of containers running on the system.
For example, consider three containers, one has a cpu-share of 1024 and
two others have a cpu-share setting of 512. When processes in all three
containers attempt to use 100% of CPU, the first container would receive
50% of the total CPU time. If you add a fourth container with a cpu-share
of 1024, the first container only gets 33% of the CPU. The remaining containers
receive 16.5%, 16.5% and 33% of the CPU.
On a multi-core system, the shares of CPU time are distributed over all CPU
cores. Even if a container is limited to less than 100% of CPU time, it can
use 100% of each individual CPU core.
For example, consider a system with more than three cores. If you start one
container **{C0}** with **-c=512** running one process, and another container
**{C1}** with **-c=1024** running two processes, this can result in the following
division of CPU shares:
PID container CPU CPU share
100 {C0} 0 100% of CPU0
101 {C1} 1 100% of CPU1
102 {C1} 2 100% of CPU2
#### **--cpus**=*amount* #### **--cpus**=*amount*
Set the total number of CPUs delegated to the pod. Default is 0.000 which indicates that there is no limit on computation power. Set the total number of CPUs delegated to the pod. Default is 0.000 which indicates that there is no limit on computation power.
@ -52,7 +100,15 @@ Examples of the List Format:
0-4,9 # bits 0, 1, 2, 3, 4, and 9 set 0-4,9 # bits 0, 1, 2, 3, 4, and 9 set
0-2,7,12-14 # bits 0, 1, 2, 7, 12, 13, and 14 set 0-2,7,12-14 # bits 0, 1, 2, 7, 12, 13, and 14 set
#### **--device**=*host-device[:container-device][:permissions]* #### **--cpuset-mems**=*nodes*
Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems.
If there are four memory nodes on the system (0-3), use `--cpuset-mems=0,1`
then processes in the container will only use memory from the first
two memory nodes.
#### **--device**=_host-device_[**:**_container-device_][**:**_permissions_]
Add a host device to the pod. Optional *permissions* parameter Add a host device to the pod. Optional *permissions* parameter
can be used to specify device permissions. It is a combination of can be used to specify device permissions. It is a combination of
@ -73,6 +129,10 @@ device. The devices that Podman will load modules for when necessary are:
Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb) Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb)
#### **--device-write-bps**=*path*
Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb)
#### **--dns**=*ipaddr* #### **--dns**=*ipaddr*
Set custom DNS servers in the /etc/resolv.conf file that will be shared between all containers in the pod. A special option, "none" is allowed which disables creation of /etc/resolv.conf for the pod. Set custom DNS servers in the /etc/resolv.conf file that will be shared between all containers in the pod. A special option, "none" is allowed which disables creation of /etc/resolv.conf for the pod.
@ -174,6 +234,16 @@ RAM. If a limit of 0 is specified (not using **-m**), the container's memory is
not limited. The actual limit may be rounded up to a multiple of the operating not limited. The actual limit may be rounded up to a multiple of the operating
system's page size (the value would be very large, that's millions of trillions). system's page size (the value would be very large, that's millions of trillions).
#### **--memory-swap**=*limit*
A limit value equal to memory plus swap. Must be used with the **-m**
(**--memory**) flag. The swap `LIMIT` should always be larger than **-m**
(**--memory**) value. By default, the swap `LIMIT` will be set to double
the value of --memory.
The format of `LIMIT` is `<number>[<unit>]`. Unit can be `b` (bytes),
`k` (kibibytes), `m` (mebibytes), or `g` (gibibytes). If you don't specify a
unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap.
#### **--name**, **-n**=*name* #### **--name**, **-n**=*name*
@ -603,7 +673,7 @@ $ podman pod create --network net1:ip=10.89.1.5 --network net2:ip=10.89.10.10
``` ```
## SEE ALSO ## SEE ALSO
**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-kube-play(1)](podman-kube-play.1.md)**, **containers.conf(1)** **[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-kube-play(1)](podman-kube-play.1.md)**, **containers.conf(1)**, **[cgroups(7)](https://man7.org/linux/man-pages/man7/cgroups.7.html)**
## HISTORY ## HISTORY

View File

@ -57,20 +57,32 @@ type InspectPodData struct {
CPUPeriod uint64 `json:"cpu_period,omitempty"` CPUPeriod uint64 `json:"cpu_period,omitempty"`
// CPUQuota contains the CPU quota of the pod // CPUQuota contains the CPU quota of the pod
CPUQuota int64 `json:"cpu_quota,omitempty"` CPUQuota int64 `json:"cpu_quota,omitempty"`
// CPUShares contains the cpu shares for the pod
CPUShares uint64 `json:"cpu_shares,omitempty"`
// CPUSetCPUs contains linux specific CPU data for the pod // CPUSetCPUs contains linux specific CPU data for the pod
CPUSetCPUs string `json:"cpuset_cpus,omitempty"` CPUSetCPUs string `json:"cpuset_cpus,omitempty"`
// CPUSetMems contains linux specific CPU data for the pod
CPUSetMems string `json:"cpuset_mems,omitempty"`
// Mounts contains volume related information for the pod // Mounts contains volume related information for the pod
Mounts []InspectMount `json:"mounts,omitempty"` Mounts []InspectMount `json:"mounts,omitempty"`
// Devices contains the specified host devices // Devices contains the specified host devices
Devices []InspectDevice `json:"devices,omitempty"` Devices []InspectDevice `json:"devices,omitempty"`
// BlkioDeviceReadBps contains the Read/Access limit for the pod's devices // BlkioDeviceReadBps contains the Read/Access limit for the pod's devices
BlkioDeviceReadBps []InspectBlkioThrottleDevice `json:"device_read_bps,omitempty"` BlkioDeviceReadBps []InspectBlkioThrottleDevice `json:"device_read_bps,omitempty"`
// BlkioDeviceReadBps contains the Read/Access limit for the pod's devices
BlkioDeviceWriteBps []InspectBlkioThrottleDevice `json:"device_write_bps,omitempty"`
// VolumesFrom contains the containers that the pod inherits mounts from // VolumesFrom contains the containers that the pod inherits mounts from
VolumesFrom []string `json:"volumes_from,omitempty"` VolumesFrom []string `json:"volumes_from,omitempty"`
// SecurityOpt contains the specified security labels and related SELinux information // SecurityOpt contains the specified security labels and related SELinux information
SecurityOpts []string `json:"security_opt,omitempty"` SecurityOpts []string `json:"security_opt,omitempty"`
// MemoryLimit contains the specified cgroup memory limit for the pod // MemoryLimit contains the specified cgroup memory limit for the pod
MemoryLimit uint64 `json:"memory_limit,omitempty"` MemoryLimit uint64 `json:"memory_limit,omitempty"`
// MemorySwap contains the specified memory swap limit for the pod
MemorySwap uint64 `json:"memory_swap,omitempty"`
// BlkioWeight contains the blkio weight limit for the pod
BlkioWeight uint64 `json:"blkio_weight,omitempty"`
// BlkioWeightDevice contains the blkio weight device limits for the pod
BlkioWeightDevice []InspectBlkioWeightDevice `json:"blkio_weight_device,omitempty"`
} }
// InspectPodInfraConfig contains the configuration of the pod's infra // InspectPodInfraConfig contains the configuration of the pod's infra

View File

@ -2145,6 +2145,18 @@ func WithServiceContainer(id string) PodCreateOption {
} }
} }
// WithPodResources sets resource limits to be applied to the pod's cgroup
// these will be inherited by all containers unless overridden.
func WithPodResources(resources specs.LinuxResources) PodCreateOption {
return func(pod *Pod) error {
if pod.valid {
return define.ErrPodFinalized
}
pod.config.ResourceLimits = resources
return nil
}
}
// WithVolatile sets the volatile flag for the container storage. // WithVolatile sets the volatile flag for the container storage.
// The option can potentially cause data loss when used on a container that must survive a machine reboot. // The option can potentially cause data loss when used on a container that must survive a machine reboot.
func WithVolatile() CtrCreateOption { func WithVolatile() CtrCreateOption {

View File

@ -83,6 +83,9 @@ type PodConfig struct {
// ID of the pod's lock // ID of the pod's lock
LockID uint32 `json:"lockID"` LockID uint32 `json:"lockID"`
// ResourceLimits hold the pod level resource limits
ResourceLimits specs.LinuxResources
} }
// podState represents a pod's state // podState represents a pod's state
@ -116,18 +119,7 @@ func (p *Pod) ResourceLim() *specs.LinuxResources {
empty := &specs.LinuxResources{ empty := &specs.LinuxResources{
CPU: &specs.LinuxCPU{}, CPU: &specs.LinuxCPU{},
} }
infra, err := p.runtime.GetContainer(p.state.InfraContainerID) if err := JSONDeepCopy(p.config.ResourceLimits, resCopy); err != nil {
if err != nil {
return empty
}
conf := infra.config.Spec
if err != nil {
return empty
}
if conf.Linux == nil || conf.Linux.Resources == nil {
return empty
}
if err = JSONDeepCopy(conf.Linux.Resources, resCopy); err != nil {
return nil return nil
} }
if resCopy.CPU != nil { if resCopy.CPU != nil {
@ -139,51 +131,91 @@ func (p *Pod) ResourceLim() *specs.LinuxResources {
// CPUPeriod returns the pod CPU period // CPUPeriod returns the pod CPU period
func (p *Pod) CPUPeriod() uint64 { func (p *Pod) CPUPeriod() uint64 {
if p.state.InfraContainerID == "" { resLim := p.ResourceLim()
if resLim.CPU == nil || resLim.CPU.Period == nil {
return 0 return 0
} }
infra, err := p.runtime.GetContainer(p.state.InfraContainerID) return *resLim.CPU.Period
if err != nil {
return 0
}
conf := infra.config.Spec
if conf != nil && conf.Linux != nil && conf.Linux.Resources != nil && conf.Linux.Resources.CPU != nil && conf.Linux.Resources.CPU.Period != nil {
return *conf.Linux.Resources.CPU.Period
}
return 0
} }
// CPUQuota returns the pod CPU quota // CPUQuota returns the pod CPU quota
func (p *Pod) CPUQuota() int64 { func (p *Pod) CPUQuota() int64 {
if p.state.InfraContainerID == "" { resLim := p.ResourceLim()
if resLim.CPU == nil || resLim.CPU.Quota == nil {
return 0 return 0
} }
infra, err := p.runtime.GetContainer(p.state.InfraContainerID) return *resLim.CPU.Quota
if err != nil {
return 0
}
conf := infra.config.Spec
if conf != nil && conf.Linux != nil && conf.Linux.Resources != nil && conf.Linux.Resources.CPU != nil && conf.Linux.Resources.CPU.Quota != nil {
return *conf.Linux.Resources.CPU.Quota
}
return 0
} }
// MemoryLimit returns the pod Memory Limit // MemoryLimit returns the pod Memory Limit
func (p *Pod) MemoryLimit() uint64 { func (p *Pod) MemoryLimit() uint64 {
if p.state.InfraContainerID == "" { resLim := p.ResourceLim()
if resLim.Memory == nil || resLim.Memory.Limit == nil {
return 0 return 0
} }
infra, err := p.runtime.GetContainer(p.state.InfraContainerID) return uint64(*resLim.Memory.Limit)
}
// MemorySwap returns the pod Memory swap limit
func (p *Pod) MemorySwap() uint64 {
resLim := p.ResourceLim()
if resLim.Memory == nil || resLim.Memory.Swap == nil {
return 0
}
return uint64(*resLim.Memory.Swap)
}
// BlkioWeight returns the pod blkio weight
func (p *Pod) BlkioWeight() uint64 {
resLim := p.ResourceLim()
if resLim.BlockIO == nil || resLim.BlockIO.Weight == nil {
return 0
}
return uint64(*resLim.BlockIO.Weight)
}
// CPUSetMems returns the pod CPUSet memory nodes
func (p *Pod) CPUSetMems() string {
resLim := p.ResourceLim()
if resLim.CPU == nil {
return ""
}
return resLim.CPU.Mems
}
// CPUShares returns the pod cpu shares
func (p *Pod) CPUShares() uint64 {
resLim := p.ResourceLim()
if resLim.CPU == nil || resLim.CPU.Shares == nil {
return 0
}
return *resLim.CPU.Shares
}
// BlkiThrottleReadBps returns the pod throttle devices
func (p *Pod) BlkiThrottleReadBps() []define.InspectBlkioThrottleDevice {
resLim := p.ResourceLim()
if resLim.BlockIO == nil || resLim.BlockIO.ThrottleReadBpsDevice == nil {
return []define.InspectBlkioThrottleDevice{}
}
devs, err := blkioDeviceThrottle(nil, resLim.BlockIO.ThrottleReadBpsDevice)
if err != nil { if err != nil {
return 0 return []define.InspectBlkioThrottleDevice{}
} }
conf := infra.config.Spec return devs
if conf != nil && conf.Linux != nil && conf.Linux.Resources != nil && conf.Linux.Resources.Memory != nil && conf.Linux.Resources.Memory.Limit != nil { }
val := *conf.Linux.Resources.Memory.Limit
return uint64(val) // BlkiThrottleWriteBps returns the pod throttle devices
func (p *Pod) BlkiThrottleWriteBps() []define.InspectBlkioThrottleDevice {
resLim := p.ResourceLim()
if resLim.BlockIO == nil || resLim.BlockIO.ThrottleWriteBpsDevice == nil {
return []define.InspectBlkioThrottleDevice{}
} }
return 0 devs, err := blkioDeviceThrottle(nil, resLim.BlockIO.ThrottleWriteBpsDevice)
if err != nil {
return []define.InspectBlkioThrottleDevice{}
}
return devs
} }
// NetworkMode returns the Network mode given by the user ex: pod, private... // NetworkMode returns the Network mode given by the user ex: pod, private...

View File

@ -659,7 +659,6 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
var infraConfig *define.InspectPodInfraConfig var infraConfig *define.InspectPodInfraConfig
var inspectMounts []define.InspectMount var inspectMounts []define.InspectMount
var devices []define.InspectDevice var devices []define.InspectDevice
var deviceLimits []define.InspectBlkioThrottleDevice
var infraSecurity []string var infraSecurity []string
if p.state.InfraContainerID != "" { if p.state.InfraContainerID != "" {
infra, err := p.runtime.GetContainer(p.state.InfraContainerID) infra, err := p.runtime.GetContainer(p.state.InfraContainerID)
@ -683,18 +682,6 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
var nodes map[string]string
devices, err = infra.GetDevices(false, *infra.config.Spec, nodes)
if err != nil {
return nil, err
}
spec := infra.config.Spec
if spec.Linux != nil && spec.Linux.Resources != nil && spec.Linux.Resources.BlockIO != nil {
deviceLimits, err = blkioDeviceThrottle(nodes, spec.Linux.Resources.BlockIO.ThrottleReadBpsDevice)
if err != nil {
return nil, err
}
}
if len(infra.config.ContainerNetworkConfig.DNSServer) > 0 { if len(infra.config.ContainerNetworkConfig.DNSServer) > 0 {
infraConfig.DNSServer = make([]string, 0, len(infra.config.ContainerNetworkConfig.DNSServer)) infraConfig.DNSServer = make([]string, 0, len(infra.config.ContainerNetworkConfig.DNSServer))
@ -731,33 +718,38 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) {
} }
inspectData := define.InspectPodData{ inspectData := define.InspectPodData{
ID: p.ID(), ID: p.ID(),
Name: p.Name(), Name: p.Name(),
Namespace: p.Namespace(), Namespace: p.Namespace(),
Created: p.CreatedTime(), Created: p.CreatedTime(),
CreateCommand: p.config.CreateCommand, CreateCommand: p.config.CreateCommand,
ExitPolicy: string(p.config.ExitPolicy), ExitPolicy: string(p.config.ExitPolicy),
State: podState, State: podState,
Hostname: p.config.Hostname, Hostname: p.config.Hostname,
Labels: p.Labels(), Labels: p.Labels(),
CreateCgroup: p.config.UsePodCgroup, CreateCgroup: p.config.UsePodCgroup,
CgroupParent: p.CgroupParent(), CgroupParent: p.CgroupParent(),
CgroupPath: p.state.CgroupPath, CgroupPath: p.state.CgroupPath,
CreateInfra: infraConfig != nil, CreateInfra: infraConfig != nil,
InfraContainerID: p.state.InfraContainerID, InfraContainerID: p.state.InfraContainerID,
InfraConfig: infraConfig, InfraConfig: infraConfig,
SharedNamespaces: sharesNS, SharedNamespaces: sharesNS,
NumContainers: uint(len(containers)), NumContainers: uint(len(containers)),
Containers: ctrs, Containers: ctrs,
CPUSetCPUs: p.ResourceLim().CPU.Cpus, CPUSetCPUs: p.ResourceLim().CPU.Cpus,
CPUPeriod: p.CPUPeriod(), CPUPeriod: p.CPUPeriod(),
CPUQuota: p.CPUQuota(), CPUQuota: p.CPUQuota(),
MemoryLimit: p.MemoryLimit(), MemoryLimit: p.MemoryLimit(),
Mounts: inspectMounts, Mounts: inspectMounts,
Devices: devices, Devices: devices,
BlkioDeviceReadBps: deviceLimits, BlkioDeviceReadBps: p.BlkiThrottleReadBps(),
VolumesFrom: p.VolumesFrom(), VolumesFrom: p.VolumesFrom(),
SecurityOpts: infraSecurity, SecurityOpts: infraSecurity,
MemorySwap: p.MemorySwap(),
BlkioWeight: p.BlkioWeight(),
CPUSetMems: p.CPUSetMems(),
BlkioDeviceWriteBps: p.BlkiThrottleWriteBps(),
CPUShares: p.CPUShares(),
} }
return &inspectData, nil return &inspectData, nil

View File

@ -80,7 +80,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath
// cgroupfs + rootless = permission denied when creating the cgroup. // cgroupfs + rootless = permission denied when creating the cgroup.
if !rootless.IsRootless() { if !rootless.IsRootless() {
res, err := GetLimits(p.InfraContainerSpec.ResourceLimits) res, err := GetLimits(p.ResourceLimits)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -113,7 +113,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option
// If we are set to use pod cgroups, set the cgroup parent that // If we are set to use pod cgroups, set the cgroup parent that
// all containers in the pod will share // all containers in the pod will share
if pod.config.UsePodCgroup { if pod.config.UsePodCgroup {
cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.InfraContainerSpec.ResourceLimits) cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.ResourceLimits)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err) return nil, fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err)
} }

View File

@ -302,60 +302,6 @@ func CompleteSpec(ctx context.Context, r *libpod.Runtime, s *specgen.SpecGenerat
return warnings, nil return warnings, nil
} }
// FinishThrottleDevices takes the temporary representation of the throttle
// devices in the specgen and looks up the major and major minors. it then
// sets the throttle devices proper in the specgen
func FinishThrottleDevices(s *specgen.SpecGenerator) error {
if bps := s.ThrottleReadBpsDevice; len(bps) > 0 {
for k, v := range bps {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return err
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
if s.ResourceLimits.BlockIO == nil {
s.ResourceLimits.BlockIO = new(spec.LinuxBlockIO)
}
s.ResourceLimits.BlockIO.ThrottleReadBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleReadBpsDevice, v)
}
}
if bps := s.ThrottleWriteBpsDevice; len(bps) > 0 {
for k, v := range bps {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return err
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice, v)
}
}
if iops := s.ThrottleReadIOPSDevice; len(iops) > 0 {
for k, v := range iops {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return err
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice, v)
}
}
if iops := s.ThrottleWriteIOPSDevice; len(iops) > 0 {
for k, v := range iops {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return err
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice, v)
}
}
return nil
}
// ConfigToSpec takes a completed container config and converts it back into a specgenerator for purposes of cloning an existing container // ConfigToSpec takes a completed container config and converts it back into a specgenerator for purposes of cloning an existing container
func ConfigToSpec(rt *libpod.Runtime, specg *specgen.SpecGenerator, contaierID string) (*libpod.Container, *libpod.InfraInherit, error) { func ConfigToSpec(rt *libpod.Runtime, specg *specgen.SpecGenerator, contaierID string) (*libpod.Container, *libpod.InfraInherit, error) {
c, err := rt.LookupContainer(contaierID) c, err := rt.LookupContainer(contaierID)
@ -540,3 +486,63 @@ func mapSecurityConfig(c *libpod.ContainerConfig, s *specgen.SpecGenerator) {
s.Groups = c.Groups s.Groups = c.Groups
s.HostUsers = c.HostUsers s.HostUsers = c.HostUsers
} }
// FinishThrottleDevices takes the temporary representation of the throttle
// devices in the specgen and looks up the major and major minors. it then
// sets the throttle devices proper in the specgen
func FinishThrottleDevices(s *specgen.SpecGenerator) error {
if s.ResourceLimits == nil {
s.ResourceLimits = &spec.LinuxResources{}
}
if s.ResourceLimits.BlockIO == nil {
s.ResourceLimits.BlockIO = &spec.LinuxBlockIO{}
}
if bps := s.ThrottleReadBpsDevice; len(bps) > 0 {
for k, v := range bps {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return fmt.Errorf("could not parse throttle device at %s: %w", k, err)
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
if s.ResourceLimits.BlockIO == nil {
s.ResourceLimits.BlockIO = new(spec.LinuxBlockIO)
}
s.ResourceLimits.BlockIO.ThrottleReadBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleReadBpsDevice, v)
}
}
if bps := s.ThrottleWriteBpsDevice; len(bps) > 0 {
for k, v := range bps {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return fmt.Errorf("could not parse throttle device at %s: %w", k, err)
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice, v)
}
}
if iops := s.ThrottleReadIOPSDevice; len(iops) > 0 {
for k, v := range iops {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return fmt.Errorf("could not parse throttle device at %s: %w", k, err)
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice, v)
}
}
if iops := s.ThrottleWriteIOPSDevice; len(iops) > 0 {
for k, v := range iops {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return fmt.Errorf("could not parse throttle device at %s: %w", k, err)
}
v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice, v)
}
}
return nil
}

View File

@ -55,9 +55,6 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener
} }
} }
if err := FinishThrottleDevices(s); err != nil {
return nil, nil, nil, err
}
// Set defaults for unset namespaces // Set defaults for unset namespaces
if s.PidNS.IsDefault() { if s.PidNS.IsDefault() {
defaultNS, err := GetDefaultNamespaceMode("pid", rtc, pod) defaultNS, err := GetDefaultNamespaceMode("pid", rtc, pod)

View File

@ -309,6 +309,17 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt
} }
g.Config.Linux.Resources = s.ResourceLimits g.Config.Linux.Resources = s.ResourceLimits
} }
weightDevices, err := WeightDevices(s.WeightDevice)
if err != nil {
return nil, err
}
if len(weightDevices) > 0 {
for _, dev := range weightDevices {
g.AddLinuxResourcesBlockIOWeightDevice(dev.Major, dev.Minor, *dev.Weight)
}
}
// Devices // Devices
// set the default rule at the beginning of device configuration // set the default rule at the beginning of device configuration
if !inUserNS && !s.Privileged { if !inUserNS && !s.Privileged {
@ -345,14 +356,6 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt
} }
} }
for k, v := range s.WeightDevice {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err)
}
g.AddLinuxResourcesBlockIOWeightDevice((int64(unix.Major(uint64(statT.Rdev)))), (int64(unix.Minor(uint64(statT.Rdev)))), *v.Weight) //nolint: unconvert
}
BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g) BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g)
g.ClearProcessEnv() g.ClearProcessEnv()
@ -413,3 +416,19 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt
return configSpec, nil return configSpec, nil
} }
func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) {
devs := []spec.LinuxWeightDevice{}
for k, v := range wtDevices {
statT := unix.Stat_t{}
if err := unix.Stat(k, &statT); err != nil {
return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err)
}
dev := new(spec.LinuxWeightDevice)
dev.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert
dev.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert
dev.Weight = v.Weight
devs = append(devs, *dev)
}
return devs, nil
}

View File

@ -13,6 +13,7 @@ import (
"github.com/containers/podman/v4/pkg/domain/entities" "github.com/containers/podman/v4/pkg/domain/entities"
"github.com/containers/podman/v4/pkg/specgen" "github.com/containers/podman/v4/pkg/specgen"
"github.com/containers/podman/v4/pkg/specgenutil" "github.com/containers/podman/v4/pkg/specgenutil"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@ -21,6 +22,10 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (*libpod.Pod, error) {
return nil, err return nil, err
} }
if p.PodSpecGen.ResourceLimits == nil {
p.PodSpecGen.ResourceLimits = &specs.LinuxResources{}
}
if !p.PodSpecGen.NoInfra { if !p.PodSpecGen.NoInfra {
imageName, err := PullOrBuildInfraImage(rt, p.PodSpecGen.InfraImage) imageName, err := PullOrBuildInfraImage(rt, p.PodSpecGen.InfraImage)
if err != nil { if err != nil {
@ -38,10 +43,33 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (*libpod.Pod, error) {
} }
} }
if !p.PodSpecGen.NoInfra {
err := FinishThrottleDevices(p.PodSpecGen.InfraContainerSpec)
if err != nil {
return nil, err
}
if p.PodSpecGen.InfraContainerSpec.ResourceLimits.BlockIO != nil {
p.PodSpecGen.ResourceLimits.BlockIO = p.PodSpecGen.InfraContainerSpec.ResourceLimits.BlockIO
}
weightDevices, err := WeightDevices(p.PodSpecGen.InfraContainerSpec.WeightDevice)
if err != nil {
return nil, err
}
if p.PodSpecGen.ResourceLimits != nil && len(weightDevices) > 0 {
if p.PodSpecGen.ResourceLimits.BlockIO == nil {
p.PodSpecGen.ResourceLimits.BlockIO = &specs.LinuxBlockIO{}
}
p.PodSpecGen.ResourceLimits.BlockIO.WeightDevice = weightDevices
}
}
options, err := createPodOptions(&p.PodSpecGen) options, err := createPodOptions(&p.PodSpecGen)
if err != nil { if err != nil {
return nil, err return nil, err
} }
pod, err := rt.NewPod(context.Background(), p.PodSpecGen, options...) pod, err := rt.NewPod(context.Background(), p.PodSpecGen, options...)
if err != nil { if err != nil {
return nil, err return nil, err
@ -55,6 +83,11 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (*libpod.Pod, error) {
return nil, err return nil, err
} }
p.PodSpecGen.InfraContainerSpec.User = "" // infraSpec user will get incorrectly assigned via the container creation process, overwrite here p.PodSpecGen.InfraContainerSpec.User = "" // infraSpec user will get incorrectly assigned via the container creation process, overwrite here
// infra's resource limits are used as a parsing tool,
// we do not want infra to get these resources in its cgroup
// make sure of that here.
p.PodSpecGen.InfraContainerSpec.ResourceLimits = nil
p.PodSpecGen.InfraContainerSpec.WeightDevice = nil
rtSpec, spec, opts, err := MakeContainer(context.Background(), rt, p.PodSpecGen.InfraContainerSpec, false, nil) rtSpec, spec, opts, err := MakeContainer(context.Background(), rt, p.PodSpecGen.InfraContainerSpec, false, nil)
if err != nil { if err != nil {
return nil, err return nil, err
@ -122,6 +155,10 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er
options = append(options, libpod.WithPodHostname(p.Hostname)) options = append(options, libpod.WithPodHostname(p.Hostname))
} }
if p.ResourceLimits != nil {
options = append(options, libpod.WithPodResources(*p.ResourceLimits))
}
options = append(options, libpod.WithPodExitPolicy(p.ExitPolicy)) options = append(options, libpod.WithPodExitPolicy(p.ExitPolicy))
return options, nil return options, nil

View File

@ -74,6 +74,12 @@ func getCPULimits(c *entities.ContainerCreateOptions) *specs.LinuxCPU {
func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (*specs.LinuxBlockIO, error) { func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (*specs.LinuxBlockIO, error) {
var err error var err error
io := &specs.LinuxBlockIO{} io := &specs.LinuxBlockIO{}
if s.ResourceLimits == nil {
s.ResourceLimits = &specs.LinuxResources{}
}
if s.ResourceLimits.BlockIO == nil {
s.ResourceLimits.BlockIO = &specs.LinuxBlockIO{}
}
hasLimits := false hasLimits := false
if b := c.BlkIOWeight; len(b) > 0 { if b := c.BlkIOWeight; len(b) > 0 {
u, err := strconv.ParseUint(b, 10, 16) u, err := strconv.ParseUint(b, 10, 16)
@ -82,6 +88,7 @@ func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (
} }
nu := uint16(u) nu := uint16(u)
io.Weight = &nu io.Weight = &nu
s.ResourceLimits.BlockIO.Weight = &nu
hasLimits = true hasLimits = true
} }
@ -96,6 +103,7 @@ func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (
if s.ThrottleReadBpsDevice, err = parseThrottleBPSDevices(bps); err != nil { if s.ThrottleReadBpsDevice, err = parseThrottleBPSDevices(bps); err != nil {
return nil, err return nil, err
} }
hasLimits = true hasLimits = true
} }
@ -123,6 +131,8 @@ func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (
if !hasLimits { if !hasLimits {
return nil, nil return nil, nil
} }
io = s.ResourceLimits.BlockIO
return io, nil return io, nil
} }
@ -509,7 +519,7 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
return err return err
} }
} }
if s.ResourceLimits.BlockIO == nil || (len(c.BlkIOWeight) != 0 || len(c.BlkIOWeightDevice) != 0) { if s.ResourceLimits.BlockIO == nil || (len(c.BlkIOWeight) != 0 || len(c.BlkIOWeightDevice) != 0 || len(c.DeviceReadBPs) != 0 || len(c.DeviceWriteBPs) != 0) {
s.ResourceLimits.BlockIO, err = getIOLimits(s, c) s.ResourceLimits.BlockIO, err = getIOLimits(s, c)
if err != nil { if err != nil {
return err return err

View File

@ -2,12 +2,17 @@
load helpers load helpers
LOOPDEVICE=
# This is a long ugly way to clean up pods and remove the pause image # This is a long ugly way to clean up pods and remove the pause image
function teardown() { function teardown() {
run_podman pod rm -f -t 0 -a run_podman pod rm -f -t 0 -a
run_podman rm -f -t 0 -a run_podman rm -f -t 0 -a
run_podman rmi --ignore $(pause_image) run_podman rmi --ignore $(pause_image)
basic_teardown basic_teardown
if [[ -n "$LOOPDEVICE" ]]; then
losetup -d $LOOPDEVICE
fi
} }
@ -474,31 +479,57 @@ spec:
@test "pod resource limits" { @test "pod resource limits" {
skip_if_remote "resource limits only implemented on non-remote" skip_if_remote "resource limits only implemented on non-remote"
if is_rootless; then if is_rootless || ! is_cgroupsv2; then
skip "only meaningful for rootful" skip "only meaningful for rootful"
fi fi
local name1="resources1" # create loopback device
run_podman --cgroup-manager=systemd pod create --name=$name1 --cpus=5 --memory=10m lofile=${PODMAN_TMPDIR}/disk.img
run_podman --cgroup-manager=systemd pod start $name1 fallocate -l 1k ${lofile}
run_podman pod inspect --format '{{.CgroupPath}}' $name1 LOOPDEVICE=$(losetup --show -f $lofile)
local path1="$output"
local actual1=$(< /sys/fs/cgroup/$path1/cpu.max)
is "$actual1" "500000 100000" "resource limits set properly"
local actual2=$(< /sys/fs/cgroup/$path1/memory.max)
is "$actual2" "10485760" "resource limits set properly"
run_podman pod --cgroup-manager=systemd rm -f $name1
local name2="resources2" # tr needed because losetup seems to use %2d
run_podman --cgroup-manager=cgroupfs pod create --cpus=5 --memory=10m --name=$name2 lomajmin=$(losetup -l --noheadings --output MAJ:MIN $LOOPDEVICE | tr -d ' ')
run_podman --cgroup-manager=cgroupfs pod start $name2 run grep -w bfq /sys/block/$(basename ${LOOPDEVICE})/queue/scheduler
run_podman pod inspect --format '{{.CgroupPath}}' $name2 if [ $status -ne 0 ]; then
local path2="$output" skip "BFQ scheduler is not supported on the system"
local actual2=$(< /sys/fs/cgroup/$path2/cpu.max) if [ -f ${lofile} ]; then
is "$actual2" "500000 100000" "resource limits set properly" run_podman '?' rm -t 0 --all --force --ignore
local actual2=$(< /sys/fs/cgroup/$path2/memory.max)
is "$actual2" "10485760" "resource limits set properly" while read path dev; do
run_podman --cgroup-manager=cgroupfs pod rm $name2 if [[ "$path" == "$lofile" ]]; then
losetup -d $dev
fi
done < <(losetup -l --noheadings --output BACK-FILE,NAME)
rm ${lofile}
fi
fi
echo bfq > /sys/block/$(basename ${LOOPDEVICE})/queue/scheduler
expected_limits="
cpu.max | 500000 100000
memory.max | 5242880
memory.swap.max | 1068498944
io.max | $lomajmin rbps=1048576 wbps=1048576 riops=max wiops=max
"
for cgm in systemd cgroupfs; do
local name=resources-$cgm
run_podman --cgroup-manager=$cgm pod create --name=$name --cpus=5 --memory=5m --memory-swap=1g --cpu-shares=1000 --cpuset-cpus=0 --cpuset-mems=0 --device-read-bps=${LOOPDEVICE}:1mb --device-write-bps=${LOOPDEVICE}:1mb --blkio-weight-device=${LOOPDEVICE}:123 --blkio-weight=50
run_podman --cgroup-manager=$cgm pod start $name
run_podman pod inspect --format '{{.CgroupPath}}' $name
local cgroup_path="$output"
while read unit expect; do
local actual=$(< /sys/fs/cgroup/$cgroup_path/$unit)
is "$actual" "$expect" "resource limit under $cgm: $unit"
done < <(parse_table "$expected_limits")
run_podman --cgroup-manager=$cgm pod rm -f $name
done
# Clean up, and prevent duplicate cleanup in teardown
losetup -d $LOOPDEVICE
LOOPDEVICE=
} }
@test "podman pod ps doesn't race with pod rm" { @test "podman pod ps doesn't race with pod rm" {