podman stats: calc CPU percentage correctly

When you run podman stats, the first interval always shows the wrong cpu
usage. To calculate cpu percentage we get the cpu time from the cgroup
and compare this against the system time between two stats. Since the
first time we do not have a previous stats an empty struct is used
instead. Thus we do not use the actual running time of the container but
the current unix timestamp (time since Jan 1 1970).

To fix this we make sure that the previous stats time is set to the
container start time, when it is empty.

[NO NEW TESTS NEEDED] No idea how I could create a test which would have
a predictable cpu usage.

See the linked bugzilla for a reproducer.

Fixes https://bugzilla.redhat.com/show_bug.cgi?id=2066145

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger
2022-03-22 17:10:43 +01:00
parent 02aae4a658
commit 0edb3ddd39
4 changed files with 14 additions and 16 deletions

View File

@ -422,10 +422,6 @@ type PodContainerStats struct {
// GetPodStats returns the stats for each of its containers
func (p *Pod) GetPodStats(previousContainerStats map[string]*define.ContainerStats) (map[string]*define.ContainerStats, error) {
var (
ok bool
prevStat *define.ContainerStats
)
p.lock.Lock()
defer p.lock.Unlock()
@ -438,10 +434,7 @@ func (p *Pod) GetPodStats(previousContainerStats map[string]*define.ContainerSta
}
newContainerStats := make(map[string]*define.ContainerStats)
for _, c := range containers {
if prevStat, ok = previousContainerStats[c.ID()]; !ok {
prevStat = &define.ContainerStats{}
}
newStats, err := c.GetContainerStats(prevStat)
newStats, err := c.GetContainerStats(previousContainerStats[c.ID()])
// If the container wasn't running, don't include it
// but also suppress the error
if err != nil && errors.Cause(err) != define.ErrCtrStateInvalid {

View File

@ -14,7 +14,9 @@ import (
"github.com/pkg/errors"
)
// GetContainerStats gets the running stats for a given container
// GetContainerStats gets the running stats for a given container.
// The previousStats is used to correctly calculate cpu percentages. You
// should pass nil if there is no previous stat for this container.
func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*define.ContainerStats, error) {
stats := new(define.ContainerStats)
stats.ContainerID = c.ID()
@ -36,6 +38,14 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de
return stats, define.ErrCtrStateInvalid
}
if previousStats == nil {
previousStats = &define.ContainerStats{
// if we have no prev stats use the container start time as prev time
// otherwise we cannot correctly calculate the CPU percentage
SystemNano: uint64(c.state.StartedTime.UnixNano()),
}
}
cgroupPath, err := c.cGroupPath()
if err != nil {
return nil, err

View File

@ -56,7 +56,7 @@ func StatsContainer(w http.ResponseWriter, r *http.Request) {
return
}
stats, err := ctnr.GetContainerStats(&define.ContainerStats{})
stats, err := ctnr.GetContainerStats(nil)
if err != nil {
utils.InternalServerError(w, errors.Wrapf(err, "failed to obtain Container %s stats", name))
return

View File

@ -1431,12 +1431,7 @@ func (ic *ContainerEngine) ContainerStats(ctx context.Context, namesOrIds []stri
reportStats := []define.ContainerStats{}
for _, ctr := range containers {
prev, ok := containerStats[ctr.ID()]
if !ok {
prev = &define.ContainerStats{}
}
stats, err := ctr.GetContainerStats(prev)
stats, err := ctr.GetContainerStats(containerStats[ctr.ID()])
if err != nil {
cause := errors.Cause(err)
if queryAll && (cause == define.ErrCtrRemoved || cause == define.ErrNoSuchCtr || cause == define.ErrCtrStateInvalid) {