mirror of
https://github.com/containers/podman.git
synced 2025-07-02 00:30:00 +08:00
Use persist dir for oom file
Conmon writes the exit file and oom file (if container was oom killed) to the persist directory. This directory is retained across reboots as well. Update podman to create a persist-dir/ctr-id for the exit and oom files for each container to be written to. The oom state of container is set after reading the files from the persist-dir/ctr-id directory. The exit code still continues to read the exit file from the exits directory. Signed-off-by: Urvashi Mohnani <umohnani@redhat.com>
This commit is contained in:
@ -890,6 +890,13 @@ func (c *Container) execExitFileDir(sessionID string) string {
|
||||
return filepath.Join(c.execBundlePath(sessionID), "exit")
|
||||
}
|
||||
|
||||
// execPersistDir gets the path to the container's persist directory
|
||||
// The persist directory container the exit file and oom file (if oomkilled)
|
||||
// of a container
|
||||
func (c *Container) execPersistDir(sessionID string) string {
|
||||
return filepath.Join(c.execBundlePath(sessionID), "persist", c.ID())
|
||||
}
|
||||
|
||||
// execOCILog returns the file path for the exec sessions oci log
|
||||
func (c *Container) execOCILog(sessionID string) string {
|
||||
if !c.ociRuntime.SupportsJSONErrors() {
|
||||
@ -917,6 +924,9 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
|
||||
return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
|
||||
}
|
||||
}
|
||||
if err := os.MkdirAll(c.execPersistDir(sessionID), execDirPermission); err != nil {
|
||||
return fmt.Errorf("creating OCI runtime persist directory path %s: %w", c.execPersistDir(sessionID), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@ -145,6 +146,10 @@ func (c *Container) exitFilePath() (string, error) {
|
||||
return c.ociRuntime.ExitFilePath(c)
|
||||
}
|
||||
|
||||
func (c *Container) oomFilePath() (string, error) {
|
||||
return c.ociRuntime.OOMFilePath(c)
|
||||
}
|
||||
|
||||
// Wait for the container's exit file to appear.
|
||||
// When it does, update our state based on it.
|
||||
func (c *Container) waitForExitFileAndSync() error {
|
||||
@ -181,6 +186,7 @@ func (c *Container) waitForExitFileAndSync() error {
|
||||
// Handle the container exit file.
|
||||
// The exit file is used to supply container exit time and exit code.
|
||||
// This assumes the exit file already exists.
|
||||
// Also check for an oom file to determine if the container was oom killed or not.
|
||||
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
|
||||
c.state.FinishedTime = ctime.Created(fi)
|
||||
statusCodeStr, err := os.ReadFile(exitFile)
|
||||
@ -194,7 +200,10 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
|
||||
}
|
||||
c.state.ExitCode = int32(statusCode)
|
||||
|
||||
oomFilePath := filepath.Join(c.bundlePath(), "oom")
|
||||
oomFilePath, err := c.oomFilePath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err = os.Stat(oomFilePath); err == nil {
|
||||
c.state.OOMKilled = true
|
||||
}
|
||||
@ -758,11 +767,6 @@ func (c *Container) removeConmonFiles() error {
|
||||
return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
|
||||
}
|
||||
|
||||
oomFile := filepath.Join(c.bundlePath(), "oom")
|
||||
if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
|
||||
return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err)
|
||||
}
|
||||
|
||||
// Remove the exit file so we don't leak memory in tmpfs
|
||||
exitFile, err := c.exitFilePath()
|
||||
if err != nil {
|
||||
@ -772,6 +776,15 @@ func (c *Container) removeConmonFiles() error {
|
||||
return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
|
||||
}
|
||||
|
||||
// Remove the oom file
|
||||
oomFile, err := c.oomFilePath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
|
||||
return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -149,6 +149,12 @@ type OCIRuntime interface { //nolint:interfacebloat
|
||||
// This is the path to that file for a given container.
|
||||
ExitFilePath(ctr *Container) (string, error)
|
||||
|
||||
// OOMFilePath is the path to a container's oom file if it was oom killed.
|
||||
// An oom file is only created when the container is oom killed. The existence
|
||||
// of this file means that the container was oom killed.
|
||||
// This is the path to that file for a given container.
|
||||
OOMFilePath(ctr *Container) (string, error)
|
||||
|
||||
// RuntimeInfo returns verbose information about the runtime.
|
||||
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)
|
||||
|
||||
|
@ -64,6 +64,7 @@ type ConmonOCIRuntime struct {
|
||||
supportsKVM bool
|
||||
supportsNoCgroups bool
|
||||
enableKeyring bool
|
||||
persistDir string
|
||||
}
|
||||
|
||||
// Make a new Conmon-based OCI runtime with the given options.
|
||||
@ -143,13 +144,15 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime
|
||||
}
|
||||
|
||||
runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
|
||||
// The persist-dir is where conmon writes the exit file and oom file (if oom killed), we join the container ID to this path later on
|
||||
runtime.persistDir = filepath.Join(runtime.tmpDir, "persist")
|
||||
|
||||
// Create the exit files and attach sockets directories
|
||||
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
|
||||
// The directory is allowed to exist
|
||||
if !os.IsExist(err) {
|
||||
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
|
||||
}
|
||||
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
|
||||
}
|
||||
if err := os.MkdirAll(runtime.persistDir, 0750); err != nil {
|
||||
return nil, fmt.Errorf("creating OCI runtime persist directory: %w", err)
|
||||
}
|
||||
return runtime, nil
|
||||
}
|
||||
@ -940,6 +943,12 @@ func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
|
||||
return filepath.Join(r.exitsDir, ctr.ID()), nil
|
||||
}
|
||||
|
||||
// OOMFilePath is the path to a container's oom file.
|
||||
// The oom file will only exist if the container was oom killed.
|
||||
func (r *ConmonOCIRuntime) OOMFilePath(ctr *Container) (string, error) {
|
||||
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
|
||||
}
|
||||
|
||||
// RuntimeInfo provides information on the runtime.
|
||||
func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
|
||||
runtimePackage := version.Package(r.path)
|
||||
@ -1107,7 +1116,11 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
|
||||
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
|
||||
}
|
||||
|
||||
args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
|
||||
persistDir := filepath.Join(r.persistDir, ctr.ID())
|
||||
args, err := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, persistDir, ociLog, ctr.LogDriver(), logTag)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
|
||||
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
|
||||
@ -1363,7 +1376,16 @@ func (r *ConmonOCIRuntime) configureConmonEnv() ([]string, error) {
|
||||
}
|
||||
|
||||
// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
|
||||
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
|
||||
// func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
|
||||
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
|
||||
// Make the persists directory for the container after the ctr ID is appended to it in the caller
|
||||
// This is needed as conmon writes the exit and oom file in the given persist directory path as just "exit" and "oom"
|
||||
// So creating a directory with the container ID under the persist dir will help keep track of which container the
|
||||
// exit and oom files belong to.
|
||||
if err := os.MkdirAll(persistDir, 0750); err != nil {
|
||||
return nil, fmt.Errorf("creating OCI runtime oom files directory for ctr %q: %w", ctr.ID(), err)
|
||||
}
|
||||
|
||||
// set the conmon API version to be able to use the correct sync struct keys
|
||||
args := []string{
|
||||
"--api-version", "1",
|
||||
@ -1374,6 +1396,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
|
||||
"-p", pidPath,
|
||||
"-n", ctr.Name(),
|
||||
"--exit-dir", exitDir,
|
||||
"--persist-dir", persistDir,
|
||||
"--full-attach",
|
||||
}
|
||||
if len(r.runtimeFlags) > 0 {
|
||||
@ -1436,7 +1459,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
|
||||
logrus.Debugf("Running with no Cgroups")
|
||||
args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
|
||||
}
|
||||
return args
|
||||
return args, nil
|
||||
}
|
||||
|
||||
// newPipe creates a unix socket pair for communication.
|
||||
|
@ -387,7 +387,10 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
|
||||
}
|
||||
defer processFile.Close()
|
||||
|
||||
args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
|
||||
args, err := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), c.execPersistDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs)
|
||||
if err != nil {
|
||||
|
@ -29,6 +29,8 @@ type MissingRuntime struct {
|
||||
name string
|
||||
// exitsDir is the directory for exit files.
|
||||
exitsDir string
|
||||
// persistDir is the directory for exit and oom files.
|
||||
persistDir string
|
||||
}
|
||||
|
||||
// Get a new MissingRuntime for the given name.
|
||||
@ -52,6 +54,7 @@ func getMissingRuntime(name string, r *Runtime) OCIRuntime {
|
||||
newRuntime := new(MissingRuntime)
|
||||
newRuntime.name = name
|
||||
newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits")
|
||||
newRuntime.persistDir = filepath.Join(r.config.Engine.TmpDir, "persist")
|
||||
|
||||
missingRuntimes[name] = newRuntime
|
||||
|
||||
@ -222,6 +225,12 @@ func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
|
||||
return filepath.Join(r.exitsDir, ctr.ID()), nil
|
||||
}
|
||||
|
||||
// OOMFilePath returns the oom file path for a container.
|
||||
// The oom file will only exist if the container was oom killed.
|
||||
func (r *MissingRuntime) OOMFilePath(ctr *Container) (string, error) {
|
||||
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
|
||||
}
|
||||
|
||||
// RuntimeInfo returns information on the missing runtime
|
||||
func (r *MissingRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
|
||||
ocirt := define.OCIRuntimeInfo{
|
||||
|
@ -70,4 +70,39 @@ var _ = Describe("Podman run memory", func() {
|
||||
Expect(session.OutputToString()).To(Equal(limit))
|
||||
})
|
||||
}
|
||||
|
||||
It("podman run memory test on oomkilled container", func() {
|
||||
mem := SystemExec("cat", []string{"/proc/sys/vm/overcommit_memory"})
|
||||
mem.WaitWithDefaultTimeout()
|
||||
if mem.OutputToString() != "0" {
|
||||
Skip("overcommit memory is not set to 0")
|
||||
}
|
||||
|
||||
ctrName := "oomkilled-ctr"
|
||||
// create a container that gets oomkilled
|
||||
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--read-only", "--memory-swap=20m", "--memory=20m", "--oom-score-adj=1000", ALPINE, "sort", "/dev/urandom"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session).Should(ExitWithError())
|
||||
|
||||
inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
|
||||
inspect.WaitWithDefaultTimeout()
|
||||
Expect(inspect).Should(ExitCleanly())
|
||||
// Check oomkilled and exit code values
|
||||
Expect(inspect.OutputToString()).Should(ContainSubstring("true"))
|
||||
Expect(inspect.OutputToString()).Should(ContainSubstring("137"))
|
||||
})
|
||||
|
||||
It("podman run memory test on successfully exited container", func() {
|
||||
ctrName := "success-ctr"
|
||||
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--memory=40m", ALPINE, "echo", "hello"})
|
||||
session.WaitWithDefaultTimeout()
|
||||
Expect(session).Should(ExitCleanly())
|
||||
|
||||
inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
|
||||
inspect.WaitWithDefaultTimeout()
|
||||
Expect(inspect).Should(ExitCleanly())
|
||||
// Check oomkilled and exit code values
|
||||
Expect(inspect.OutputToString()).Should(ContainSubstring("false"))
|
||||
Expect(inspect.OutputToString()).Should(ContainSubstring("0"))
|
||||
})
|
||||
})
|
||||
|
Reference in New Issue
Block a user