Support --cpuset-<cpus/mems> in podman kube play

This commit adds two new annotations named
io.podman.annotations.cpuset/$ctrname and
io.podman.annotations.memory-nodes/$ctrname

The first one allows restricting a container's execution to specific
CPU cores while the second restricts memory allocations to specific
NUMA memory nodes. They are also added automatically when the
--cpuset-cpus and --cpuset-mems options are used.

Fixes: containers#26172

Signed-off-by: François Poirotte <clicky@erebot.net>
This commit is contained in:
François Poirotte
2025-05-21 14:39:15 +02:00
parent 27fdd7fc6d
commit 5bfdb25b26
5 changed files with 67 additions and 0 deletions

View File

@ -57,6 +57,10 @@ Note: To customize the name of the infra container created during `podman kube p
Note: Use the **io.podman.annotations.pids-limit/$ctrname** annotation to configure the pod's pids limit.
Note: Use the **io.podman.annotations.cpuset/$ctrname** annotation to restrict a container's execution to a specific set of CPU cores. This is equivalent to the `--cpuset-cpus=number` option in podman-run(1).
Note: Use the **io.podman.annotations.memory-nodes/$ctrname** annotation to restrict a container's memory allocations to a specific set of memory nodes on NUMA systems. This is equivalent to the `--cpuset-mems=nodes` option in podman-run(1).
`Kubernetes PersistentVolumeClaims`
A Kubernetes PersistentVolumeClaim represents a Podman named volume. Only the PersistentVolumeClaim name is required by Podman to create a volume. Kubernetes annotations can be used to make use of the available options for Podman volumes.

View File

@ -172,6 +172,12 @@ const (
// PIDsLimitAnnotation is used to limit the number of PIDs
PIDsLimitAnnotation = "io.podman.annotations.pids-limit"
// CpusetAnnotation is used to restrict execution to specific CPU cores
CpusetAnnotation = "io.podman.annotations.cpuset"
// MemoryNodesAnnotation is used to restrict memory allocations to specific memory nodes on NUMA systems
MemoryNodesAnnotation = "io.podman.annotations.memory-nodes"
// TotalAnnotationSizeLimitB is the max length of annotations allowed by Kubernetes.
TotalAnnotationSizeLimitB int = 256 * (1 << 10) // 256 kB
)

View File

@ -393,6 +393,28 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
}
}
if cpuset, ok := annotations[define.CpusetAnnotation+"/"+opts.Container.Name]; ok {
s.Annotations[define.CpusetAnnotation] = cpuset
if s.ResourceLimits == nil {
s.ResourceLimits = &spec.LinuxResources{}
}
if s.ResourceLimits.CPU == nil {
s.ResourceLimits.CPU = &spec.LinuxCPU{}
}
s.ResourceLimits.CPU.Cpus = cpuset
}
if memNodes, ok := annotations[define.MemoryNodesAnnotation+"/"+opts.Container.Name]; ok {
s.Annotations[define.MemoryNodesAnnotation] = memNodes
if s.ResourceLimits == nil {
s.ResourceLimits = &spec.LinuxResources{}
}
if s.ResourceLimits.CPU == nil {
s.ResourceLimits.CPU = &spec.LinuxCPU{}
}
s.ResourceLimits.CPU.Mems = memNodes
}
if label, ok := opts.Annotations[define.InspectAnnotationLabel+"/"+opts.Container.Name]; ok {
if label == "nested" {
s.ContainerSecurityConfig.LabelNested = &localTrue

View File

@ -531,6 +531,14 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
s.Annotations[define.PIDsLimitAnnotation] = strconv.FormatInt(*c.PIDsLimit, 10)
}
if c.CPUSetCPUs != "" {
s.Annotations[define.CpusetAnnotation] = c.CPUSetCPUs
}
if c.CPUSetMems != "" {
s.Annotations[define.MemoryNodesAnnotation] = c.CPUSetMems
}
if len(c.StorageOpts) > 0 {
opts := make(map[string]string, len(c.StorageOpts))
for _, opt := range c.StorageOpts {

View File

@ -6194,4 +6194,31 @@ spec:
Expect(exec.OutputToString()).To(Equal("10"))
})
It("test cpuset annotation", func() {
ctrAnnotation := "io.podman.annotations.cpuset/" + defaultCtrName
pod := getPod(withAnnotation(ctrAnnotation, "0"), withPodInitCtr(getCtr(withImage(CITEST_IMAGE), withCmd([]string{"printenv", "container"}), withInitCtr(), withName("init-test"))), withCtr(getCtr(withImage(CITEST_IMAGE), withCmd([]string{"top"}))))
err := generateKubeYaml("pod", pod, kubeYaml)
Expect(err).ToNot(HaveOccurred())
kube := podmanTest.Podman([]string{"kube", "play", kubeYaml})
kube.WaitWithDefaultTimeout()
Expect(kube).Should(ExitCleanly())
exec := podmanTest.PodmanExitCleanly("exec", "testPod-"+defaultCtrName, "cat", "/sys/fs/cgroup/cpuset.cpus.effective")
Expect(exec.OutputToString()).To(Equal("0"))
})
It("test memory-nodes annotation", func() {
ctrAnnotation := "io.podman.annotations.memory-nodes/" + defaultCtrName
pod := getPod(withAnnotation(ctrAnnotation, "0"), withPodInitCtr(getCtr(withImage(CITEST_IMAGE), withCmd([]string{"printenv", "container"}), withInitCtr(), withName("init-test"))), withCtr(getCtr(withImage(CITEST_IMAGE), withCmd([]string{"top"}))))
err := generateKubeYaml("pod", pod, kubeYaml)
Expect(err).ToNot(HaveOccurred())
kube := podmanTest.Podman([]string{"kube", "play", kubeYaml})
kube.WaitWithDefaultTimeout()
Expect(kube).Should(ExitCleanly())
exec := podmanTest.PodmanExitCleanly("exec", "testPod-"+defaultCtrName, "cat", "/sys/fs/cgroup/cpuset.mems.effective")
Expect(exec.OutputToString()).To(Equal("0"))
})
})