Files
podman/libpod/container_log.go
Paul Holzinger f6d18ed41c fix goroutine leaks in events and logs backend
When running a single podman logs this is not really important since we
will exit when we finish reading the logs. However for the system
service this is very important. Leaking goroutines will cause an
increased memory and CPU ussage over time.

Both the the event and log backend have goroutine leaks with both the
file and journald drivers.

The journald backend has the problem that journal.Wait(IndefiniteWait)
will block until we get a new journald event. So when a client closes
the connection the goroutine would still wait until there is a new
journal entry. To fix this we just wait for a maximum of 5 seconds,
after that we can check if the client connection was closed and exit
correctly in this case.

For the file backend we can fix this by waiting for either the log line
or context cancel at the same time. Currently it would block waiting for
new log lines and only check afterwards if the client closed the
connection and thus hang forever if there are no new log lines.

[NO NEW TESTS NEEDED] I am open to ideas how we can test memory leaks in
CI.
To test manually run a container like this:
`podman run --log-driver $driver  --name test -d alpine sh -c 'i=1; while [ "$i" -ne 1000 ]; do echo "line $i"; i=$((i + 1)); done; sleep inf'`
where `$driver` can be either `journald` or `k8s-file`.
Then start the podman system service and use:
`curl -m 1 --output -  --unix-socket $XDG_RUNTIME_DIR/podman/podman.sock -v 'http://d/containers/test/logs?follow=1&since=0&stderr=1&stdout=1' &>/dev/null`
to get the logs from the API and then it closes the connection after 1 second.
Now run the curl command several times and check the memory usage of the service.

Fixes #14879

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
2022-07-26 13:54:28 -04:00

154 lines
4.6 KiB
Go

package libpod
import (
"context"
"errors"
"fmt"
"os"
"time"
"github.com/containers/podman/v4/libpod/define"
"github.com/containers/podman/v4/libpod/events"
"github.com/containers/podman/v4/libpod/logs"
"github.com/nxadm/tail"
"github.com/nxadm/tail/watch"
"github.com/sirupsen/logrus"
)
// logDrivers stores the currently available log drivers, do not modify
var logDrivers []string
func init() {
logDrivers = append(logDrivers, define.KubernetesLogging, define.NoLogging, define.PassthroughLogging)
}
// Log is a runtime function that can read one or more container logs.
func (r *Runtime) Log(ctx context.Context, containers []*Container, options *logs.LogOptions, logChannel chan *logs.LogLine) error {
for c, ctr := range containers {
if err := ctr.ReadLog(ctx, options, logChannel, int64(c)); err != nil {
return err
}
}
return nil
}
// ReadLog reads a containers log based on the input options and returns log lines over a channel.
func (c *Container) ReadLog(ctx context.Context, options *logs.LogOptions, logChannel chan *logs.LogLine, colorID int64) error {
switch c.LogDriver() {
case define.PassthroughLogging:
return fmt.Errorf("this container is using the 'passthrough' log driver, cannot read logs: %w", define.ErrNoLogs)
case define.NoLogging:
return fmt.Errorf("this container is using the 'none' log driver, cannot read logs: %w", define.ErrNoLogs)
case define.JournaldLogging:
return c.readFromJournal(ctx, options, logChannel, colorID)
case define.JSONLogging:
// TODO provide a separate implementation of this when Conmon
// has support.
fallthrough
case define.KubernetesLogging, "":
return c.readFromLogFile(ctx, options, logChannel, colorID)
default:
return fmt.Errorf("unrecognized log driver %q, cannot read logs: %w", c.LogDriver(), define.ErrInternal)
}
}
func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOptions, logChannel chan *logs.LogLine, colorID int64) error {
t, tailLog, err := logs.GetLogFile(c.LogPath(), options)
if err != nil {
// If the log file does not exist, this is not fatal.
if errors.Is(err, os.ErrNotExist) {
return nil
}
return fmt.Errorf("unable to read log file %s for %s : %w", c.ID(), c.LogPath(), err)
}
options.WaitGroup.Add(1)
if len(tailLog) > 0 {
for _, nll := range tailLog {
nll.CID = c.ID()
nll.CName = c.Name()
nll.ColorID = colorID
if nll.Since(options.Since) && nll.Until(options.Until) {
logChannel <- nll
}
}
}
go func() {
defer options.WaitGroup.Done()
var line *tail.Line
var ok bool
for {
select {
case <-ctx.Done():
// the consumer has cancelled
t.Kill(errors.New("hangup by client"))
return
case line, ok = <-t.Lines:
if !ok {
// channel was closed
return
}
}
nll, err := logs.NewLogLine(line.Text)
if err != nil {
logrus.Errorf("Getting new log line: %v", err)
continue
}
nll.CID = c.ID()
nll.CName = c.Name()
nll.ColorID = colorID
if nll.Since(options.Since) && nll.Until(options.Until) {
logChannel <- nll
}
}
}()
// Check if container is still running or paused
if options.Follow {
// If the container isn't running or if we encountered an error
// getting its state, instruct the logger to read the file
// until EOF.
state, err := c.State()
if err != nil || state != define.ContainerStateRunning {
if err != nil && !errors.Is(err, define.ErrNoSuchCtr) {
logrus.Errorf("Getting container state: %v", err)
}
go func() {
// Make sure to wait at least for the poll duration
// before stopping the file logger (see #10675).
time.Sleep(watch.POLL_DURATION)
tailError := t.StopAtEOF()
if tailError != nil && tailError.Error() != "tail: stop at eof" {
logrus.Errorf("Stopping logger: %v", tailError)
}
}()
return nil
}
// The container is running, so we need to wait until the container exited
go func() {
eventChannel := make(chan *events.Event)
eventOptions := events.ReadOptions{
EventChannel: eventChannel,
Filters: []string{"event=died", "container=" + c.ID()},
Stream: true,
}
go func() {
if err := c.runtime.Events(ctx, eventOptions); err != nil {
logrus.Errorf("Waiting for container to exit: %v", err)
}
}()
// Now wait for the died event and signal to finish
// reading the log until EOF.
<-eventChannel
// Make sure to wait at least for the poll duration
// before stopping the file logger (see #10675).
time.Sleep(watch.POLL_DURATION)
tailError := t.StopAtEOF()
if tailError != nil && fmt.Sprintf("%v", tailError) != "tail: stop at eof" {
logrus.Errorf("Stopping logger: %v", tailError)
}
}()
}
return nil
}