From b25b330306782019d7aaf7618cd4598a9ae87250 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 24 Nov 2021 13:41:02 +0100
Subject: [PATCH 1/3] stats: get the memory limit from the spec

OCI runtimes may set the memory limits in different ways, e.g., crun
creates a sub-cgroup where the limits are applied, while runc applies
them directly on the created cgroup.  Since there is standardization
on the cgroup path to use, just use the limit specified in the spec
file.

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 libpod/stats.go | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/libpod/stats.go b/libpod/stats.go
index 9751525354..cc1250e837 100644
--- a/libpod/stats.go
+++ b/libpod/stats.go
@@ -3,6 +3,7 @@
 package libpod
 
 import (
+	"math"
 	"strings"
 	"syscall"
 	"time"
@@ -68,7 +69,7 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de
 	stats.AvgCPU = calculateAvgCPU(stats.CPU, previousStats.AvgCPU, previousStats.DataPoints)
 	stats.DataPoints = previousStats.DataPoints + 1
 	stats.MemUsage = cgroupStats.Memory.Usage.Usage
-	stats.MemLimit = getMemLimit(cgroupStats.Memory.Usage.Limit)
+	stats.MemLimit = c.getMemLimit()
 	stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100
 	stats.PIDs = 0
 	if conState == define.ContainerStateRunning || conState == define.ContainerStatePaused {
@@ -91,22 +92,29 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de
 	return stats, nil
 }
 
-// getMemory limit returns the memory limit for a given cgroup
-// If the configured memory limit is larger than the total memory on the sys, the
-// physical system memory size is returned
-func getMemLimit(cgroupLimit uint64) uint64 {
+// getMemory limit returns the memory limit for a container
+func (c *Container) getMemLimit() uint64 {
+	memLimit := uint64(math.MaxUint64)
+
+	if c.config.Spec.Linux != nil && c.config.Spec.Linux.Resources != nil &&
+		c.config.Spec.Linux.Resources.Memory != nil && c.config.Spec.Linux.Resources.Memory.Limit != nil {
+		memLimit = uint64(*c.config.Spec.Linux.Resources.Memory.Limit)
+	}
+
 	si := &syscall.Sysinfo_t{}
 	err := syscall.Sysinfo(si)
 	if err != nil {
-		return cgroupLimit
+		return memLimit
 	}
 
 	//nolint:unconvert
 	physicalLimit := uint64(si.Totalram)
-	if cgroupLimit > physicalLimit {
+
+	if memLimit <= 0 || memLimit > physicalLimit {
 		return physicalLimit
 	}
-	return cgroupLimit
+
+	return memLimit
 }
 
 // calculateCPUPercent calculates the cpu usage using the latest measurement in stats.

From a66f40b4df039e94572fa38c070207a435cfa466 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 24 Nov 2021 10:34:47 +0100
Subject: [PATCH 2/3] libpod, inspect: export cgroup path

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 libpod/container_inspect.go        | 11 +++++++++++
 libpod/define/container_inspect.go |  1 +
 2 files changed, 12 insertions(+)

diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go
index 76a08ce300..83b6432665 100644
--- a/libpod/container_inspect.go
+++ b/libpod/container_inspect.go
@@ -97,6 +97,16 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
 		return nil, err
 	}
 
+	cgroupPath, err := c.cGroupPath()
+	if err != nil {
+		// Handle the case where the container is not running or has no cgroup.
+		if errors.Is(err, define.ErrNoCgroups) || errors.Is(err, define.ErrCtrStopped) {
+			cgroupPath = ""
+		} else {
+			return nil, err
+		}
+	}
+
 	data := &define.InspectContainerData{
 		ID:      config.ID,
 		Created: config.CreatedTime,
@@ -116,6 +126,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
 			StartedAt:    runtimeInfo.StartedTime,
 			FinishedAt:   runtimeInfo.FinishedTime,
 			Checkpointed: runtimeInfo.Checkpointed,
+			CgroupPath:   cgroupPath,
 		},
 		Image:           config.RootfsImageID,
 		ImageName:       config.RootfsImageName,
diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go
index 8e07cff816..677b392186 100644
--- a/libpod/define/container_inspect.go
+++ b/libpod/define/container_inspect.go
@@ -204,6 +204,7 @@ type InspectContainerState struct {
 	FinishedAt   time.Time          `json:"FinishedAt"`
 	Health       HealthCheckResults `json:"Health,omitempty"`
 	Checkpointed bool               `json:"Checkpointed,omitempty"`
+	CgroupPath   string             `json:"CgroupPath,omitempty"`
 }
 
 // Healthcheck returns the HealthCheckResults. This is used for old podman compat

From e648122b2986ea3bdcee33ebaef8731e574e8f54 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivan@redhat.com>
Date: Wed, 24 Nov 2021 10:35:07 +0100
Subject: [PATCH 3/3] libpod: improve heuristic to detect cgroup

improve the heuristic to detect the scope that was created for the container.
This is necessary with systemd running as PID 1, since it moves itself
to a different sub-cgroup, thus stats would not account for other
processes in the same container.

Closes: https://github.com/containers/podman/issues/12400

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
---
 libpod/container.go                | 25 +++++++++++++++++++++++++
 libpod/container_internal_linux.go |  2 +-
 test/e2e/systemd_test.go           |  5 +++++
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/libpod/container.go b/libpod/container.go
index c38acb5135..482af43f39 100644
--- a/libpod/container.go
+++ b/libpod/container.go
@@ -6,9 +6,11 @@ import (
 	"io/ioutil"
 	"net"
 	"os"
+	"strings"
 	"time"
 
 	types040 "github.com/containernetworking/cni/pkg/types/040"
+	"github.com/containers/common/pkg/config"
 	"github.com/containers/common/pkg/secrets"
 	"github.com/containers/image/v5/manifest"
 	"github.com/containers/podman/v3/libpod/define"
@@ -963,6 +965,29 @@ func (c *Container) cGroupPath() (string, error) {
 		return "", errors.Errorf("could not find any cgroup in %q", procPath)
 	}
 
+	cgroupManager := c.CgroupManager()
+	switch {
+	case c.config.CgroupsMode == cgroupSplit:
+		name := fmt.Sprintf("/libpod-payload-%s/", c.ID())
+		if index := strings.LastIndex(cgroupPath, name); index >= 0 {
+			return cgroupPath[:index+len(name)-1], nil
+		}
+	case cgroupManager == config.CgroupfsCgroupsManager:
+		name := fmt.Sprintf("/libpod-%s/", c.ID())
+		if index := strings.LastIndex(cgroupPath, name); index >= 0 {
+			return cgroupPath[:index+len(name)-1], nil
+		}
+	case cgroupManager == config.SystemdCgroupsManager:
+		// When running under systemd, try to detect the scope that was requested
+		// to be created.  It improves the heuristic since we report the first
+		// cgroup that was created instead of the cgroup where PID 1 might have
+		// moved to.
+		name := fmt.Sprintf("/libpod-%s.scope/", c.ID())
+		if index := strings.LastIndex(cgroupPath, name); index >= 0 {
+			return cgroupPath[:index+len(name)-1], nil
+		}
+	}
+
 	return cgroupPath, nil
 }
 
diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go
index 364b77f29a..956460c325 100644
--- a/libpod/container_internal_linux.go
+++ b/libpod/container_internal_linux.go
@@ -2618,7 +2618,7 @@ func (c *Container) getOCICgroupPath() (string, error) {
 		if err != nil {
 			return "", err
 		}
-		return filepath.Join(selfCgroup, "container"), nil
+		return filepath.Join(selfCgroup, fmt.Sprintf("libpod-payload-%s", c.ID())), nil
 	case cgroupManager == config.SystemdCgroupsManager:
 		// When the OCI runtime is set to use Systemd as a cgroup manager, it
 		// expects cgroups to be passed as follows:
diff --git a/test/e2e/systemd_test.go b/test/e2e/systemd_test.go
index 98def3d8f2..32c2cd1b87 100644
--- a/test/e2e/systemd_test.go
+++ b/test/e2e/systemd_test.go
@@ -109,6 +109,11 @@ WantedBy=multi-user.target
 		stats := podmanTest.Podman([]string{"stats", "--no-stream", ctrName})
 		stats.WaitWithDefaultTimeout()
 		Expect(stats).Should(Exit(0))
+
+		cgroupPath := podmanTest.Podman([]string{"inspect", "--format='{{.State.CgroupPath}}'", ctrName})
+		cgroupPath.WaitWithDefaultTimeout()
+		Expect(cgroupPath).Should(Exit(0))
+		Expect(result.OutputToString()).To(Not(ContainSubstring("init.scope")))
 	})
 
 	It("podman create container with systemd entrypoint triggers systemd mode", func() {