mirror of
https://github.com/containers/podman.git
synced 2025-05-20 08:36:23 +08:00

There is a problem where our tail code does not handles correctly partial log lines. This makes podman logs --tail output possibly incorrect lines when k8s-file is used. This manifests as flake in CI because partial lines are only sometimes written, basically always when the output is flushed before writing a newline. For our code we must not count partial lines which was already done but the important thing we must keep reading backwards until the next full (F) line. This is because all partial (P) lines still must be added to the full line. See the added tests for details on how the log file looks like. While fixing this, I rework the tail logic a bit, there is absolutely no reason to read the lines in a separate goroutine just to pass the lines back via channel. We can do this in the same routine. The logic is very simple, read the lines backwards, append lines to result and then at the end invert the result slice as tail must return the lines in the correct order. This more efficient then having to allocate two different slices or to prepend the line as this would require a new allocation for each line. Lastly the readFromLogFile() function wrote the lines back to the log line channel in the same routine as the log lines we read, this was bad and causes a deadlock when the returned lines are bigger than the channel size. There is no reason to allocate a big channel size we can just write the log lines in a different goroutine, in this case the main routine were read the logs anyway. A new system test and unit tests have been added to check corner cases. Fixes #19545 Signed-off-by: Paul Holzinger <pholzing@redhat.com>
165 lines
5.1 KiB
Go
165 lines
5.1 KiB
Go
package libpod
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/containers/podman/v4/libpod/define"
|
|
"github.com/containers/podman/v4/libpod/events"
|
|
"github.com/containers/podman/v4/libpod/logs"
|
|
systemdDefine "github.com/containers/podman/v4/pkg/systemd/define"
|
|
"github.com/nxadm/tail"
|
|
"github.com/nxadm/tail/watch"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
// logDrivers stores the currently available log drivers, do not modify
|
|
var logDrivers []string
|
|
|
|
func init() {
|
|
logDrivers = append(logDrivers, define.KubernetesLogging, define.NoLogging, define.PassthroughLogging)
|
|
}
|
|
|
|
// Log is a runtime function that can read one or more container logs.
|
|
func (r *Runtime) Log(ctx context.Context, containers []*Container, options *logs.LogOptions, logChannel chan *logs.LogLine) error {
|
|
for c, ctr := range containers {
|
|
if err := ctr.ReadLog(ctx, options, logChannel, int64(c)); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ReadLog reads a container's log based on the input options and returns log lines over a channel.
|
|
func (c *Container) ReadLog(ctx context.Context, options *logs.LogOptions, logChannel chan *logs.LogLine, colorID int64) error {
|
|
switch c.LogDriver() {
|
|
case define.PassthroughLogging:
|
|
// if running under systemd fallback to a more native journald reading
|
|
if unitName, ok := c.config.Labels[systemdDefine.EnvVariable]; ok {
|
|
return c.readFromJournal(ctx, options, logChannel, colorID, unitName)
|
|
}
|
|
return fmt.Errorf("this container is using the 'passthrough' log driver, cannot read logs: %w", define.ErrNoLogs)
|
|
case define.NoLogging:
|
|
return fmt.Errorf("this container is using the 'none' log driver, cannot read logs: %w", define.ErrNoLogs)
|
|
case define.JournaldLogging:
|
|
return c.readFromJournal(ctx, options, logChannel, colorID, "")
|
|
case define.JSONLogging:
|
|
// TODO provide a separate implementation of this when Conmon
|
|
// has support.
|
|
fallthrough
|
|
case define.KubernetesLogging, "":
|
|
return c.readFromLogFile(ctx, options, logChannel, colorID)
|
|
default:
|
|
return fmt.Errorf("unrecognized log driver %q, cannot read logs: %w", c.LogDriver(), define.ErrInternal)
|
|
}
|
|
}
|
|
|
|
func (c *Container) readFromLogFile(ctx context.Context, options *logs.LogOptions, logChannel chan *logs.LogLine, colorID int64) error {
|
|
t, tailLog, err := logs.GetLogFile(c.LogPath(), options)
|
|
if err != nil {
|
|
// If the log file does not exist, this is not fatal.
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("unable to read log file %s for %s : %w", c.ID(), c.LogPath(), err)
|
|
}
|
|
options.WaitGroup.Add(1)
|
|
go func() {
|
|
if options.Until.After(time.Now()) {
|
|
time.Sleep(time.Until(options.Until))
|
|
if err := t.Stop(); err != nil {
|
|
logrus.Errorf("Stopping logger: %v", err)
|
|
}
|
|
}
|
|
}()
|
|
|
|
go func() {
|
|
for _, nll := range tailLog {
|
|
nll.CID = c.ID()
|
|
nll.CName = c.Name()
|
|
nll.ColorID = colorID
|
|
if nll.Since(options.Since) && nll.Until(options.Until) {
|
|
logChannel <- nll
|
|
}
|
|
}
|
|
defer options.WaitGroup.Done()
|
|
var line *tail.Line
|
|
var ok bool
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
// the consumer has cancelled
|
|
t.Kill(errors.New("hangup by client"))
|
|
return
|
|
case line, ok = <-t.Lines:
|
|
if !ok {
|
|
// channel was closed
|
|
return
|
|
}
|
|
}
|
|
nll, err := logs.NewLogLine(line.Text)
|
|
if err != nil {
|
|
logrus.Errorf("Getting new log line: %v", err)
|
|
continue
|
|
}
|
|
nll.CID = c.ID()
|
|
nll.CName = c.Name()
|
|
nll.ColorID = colorID
|
|
if nll.Since(options.Since) && nll.Until(options.Until) {
|
|
logChannel <- nll
|
|
}
|
|
}
|
|
}()
|
|
// Check if container is still running or paused
|
|
if options.Follow {
|
|
// If the container isn't running or if we encountered an error
|
|
// getting its state, instruct the logger to read the file
|
|
// until EOF.
|
|
state, err := c.State()
|
|
if err != nil || state != define.ContainerStateRunning {
|
|
if err != nil && !errors.Is(err, define.ErrNoSuchCtr) {
|
|
logrus.Errorf("Getting container state: %v", err)
|
|
}
|
|
go func() {
|
|
// Make sure to wait at least for the poll duration
|
|
// before stopping the file logger (see #10675).
|
|
time.Sleep(watch.POLL_DURATION)
|
|
tailError := t.StopAtEOF()
|
|
if tailError != nil && tailError.Error() != "tail: stop at eof" {
|
|
logrus.Errorf("Stopping logger: %v", tailError)
|
|
}
|
|
}()
|
|
return nil
|
|
}
|
|
|
|
// The container is running, so we need to wait until the container exited
|
|
go func() {
|
|
eventChannel := make(chan *events.Event)
|
|
eventOptions := events.ReadOptions{
|
|
EventChannel: eventChannel,
|
|
Filters: []string{"event=died", "container=" + c.ID()},
|
|
Stream: true,
|
|
}
|
|
go func() {
|
|
if err := c.runtime.Events(ctx, eventOptions); err != nil {
|
|
logrus.Errorf("Waiting for container to exit: %v", err)
|
|
}
|
|
}()
|
|
// Now wait for the died event and signal to finish
|
|
// reading the log until EOF.
|
|
<-eventChannel
|
|
// Make sure to wait at least for the poll duration
|
|
// before stopping the file logger (see #10675).
|
|
time.Sleep(watch.POLL_DURATION)
|
|
tailError := t.StopAtEOF()
|
|
if tailError != nil && fmt.Sprintf("%v", tailError) != "tail: stop at eof" {
|
|
logrus.Errorf("Stopping logger: %v", tailError)
|
|
}
|
|
}()
|
|
}
|
|
return nil
|
|
}
|