From 61cbc0c3ee57e2fbcf73ab9aa1b2369f7b1e2c1a Mon Sep 17 00:00:00 2001 From: ryanmccann1024 Date: Thu, 31 Jul 2025 11:16:57 -0500 Subject: [PATCH] feat(exec): Add --no-session flag for improved performance Fixes: #26588 For use cases like HPC, where `podman exec` is called in rapid succession, the standard exec process can become a bottleneck due to container locking and database I/O for session tracking. This commit introduces a new `--no-session` flag to `podman exec`. When used, this flag invokes a new, lightweight backend implementation that: - Skips container locking, reducing lock contention - Bypasses the creation, tracking, and removal of exec sessions in the database - Executes the command directly and retrieves the exit code without persisting session state - Maintains consistency with regular exec for container lookup, TTY handling, and environment setup - Shares implementation with health check execution to avoid code duplication The implementation addresses all performance bottlenecks while preserving compatibility with existing exec functionality including --latest flag support and proper exit code handling. Changes include: - Add --no-session flag to cmd/podman/containers/exec.go - Implement lightweight execution path in libpod/container_exec.go - Ensure consistent container validation and environment setup - Add comprehensive exit code testing including signal handling (exit 137) - Optimize configuration to skip unnecessary exit command setup Signed-off-by: Ryan McCann Signed-off-by: ryanmccann1024 --- cmd/podman/containers/exec.go | 35 ++-- docs/source/markdown/options/no-session.md | 7 + docs/source/markdown/podman-exec.1.md.in | 2 + libpod/container_exec.go | 178 +++++++++++---------- pkg/domain/entities/engine_container.go | 1 + pkg/domain/infra/abi/containers.go | 61 +++++-- pkg/domain/infra/tunnel/containers.go | 4 + test/e2e/exec_test.go | 54 +++++++ 8 files changed, 237 insertions(+), 105 deletions(-) create mode 100644 docs/source/markdown/options/no-session.md diff --git a/cmd/podman/containers/exec.go b/cmd/podman/containers/exec.go index 3089969fae..59ba853e67 100644 --- a/cmd/podman/containers/exec.go +++ b/cmd/podman/containers/exec.go @@ -51,6 +51,7 @@ var ( execOpts entities.ExecOptions execDetach bool execCidFile string + execNoSession bool ) func execFlags(cmd *cobra.Command) { @@ -100,6 +101,10 @@ func execFlags(cmd *cobra.Command) { flags.Int32(waitFlagName, 0, "Total seconds to wait for container to start") _ = flags.MarkHidden(waitFlagName) + if !registry.IsRemote() { + flags.BoolVar(&execNoSession, "no-session", false, "Do not create a database session for the exec process") + } + if registry.IsRemote() { _ = flags.MarkHidden("preserve-fds") } @@ -121,6 +126,12 @@ func init() { } func exec(cmd *cobra.Command, args []string) error { + if execNoSession { + if execDetach || cmd.Flags().Changed("detach-keys") { + return errors.New("--no-session cannot be used with --detach or --detach-keys") + } + } + nameOrID, command, err := determineTargetCtrAndCmd(args, execOpts.Latest, execCidFile != "") if err != nil { return err @@ -168,17 +179,21 @@ func exec(cmd *cobra.Command, args []string) error { } } - if !execDetach { - streams := define.AttachStreams{} - streams.OutputStream = os.Stdout - streams.ErrorStream = os.Stderr - if execOpts.Interactive { - streams.InputStream = bufio.NewReader(os.Stdin) - streams.AttachInput = true - } - streams.AttachOutput = true - streams.AttachError = true + streams := define.AttachStreams{} + streams.OutputStream = os.Stdout + streams.ErrorStream = os.Stderr + if execOpts.Interactive { + streams.InputStream = bufio.NewReader(os.Stdin) + streams.AttachInput = true + } + streams.AttachOutput = true + streams.AttachError = true + if execNoSession { + exitCode, err := registry.ContainerEngine().ContainerExecNoSession(registry.Context(), nameOrID, execOpts, streams) + registry.SetExitCode(exitCode) + return err + } else if !execDetach { exitCode, err := registry.ContainerEngine().ContainerExec(registry.Context(), nameOrID, execOpts, streams) registry.SetExitCode(exitCode) return err diff --git a/docs/source/markdown/options/no-session.md b/docs/source/markdown/options/no-session.md new file mode 100644 index 0000000000..411aafdd92 --- /dev/null +++ b/docs/source/markdown/options/no-session.md @@ -0,0 +1,7 @@ +####> This option file is used in: +####> podman exec +####> If file is edited, make sure the changes +####> are applicable to all of those. +#### **--no-session** + +Do not create a database session for the exec process. This can improve performance but the exec session will not be visible to other podman commands. diff --git a/docs/source/markdown/podman-exec.1.md.in b/docs/source/markdown/podman-exec.1.md.in index ae661a8395..2e0215ce11 100644 --- a/docs/source/markdown/podman-exec.1.md.in +++ b/docs/source/markdown/podman-exec.1.md.in @@ -31,6 +31,8 @@ Start the exec session, but do not attach to it. The command runs in the backgro @@option latest +@@option no-session + @@option preserve-fd @@option preserve-fds diff --git a/libpod/container_exec.go b/libpod/container_exec.go index 3aef3060a2..409c40cfc7 100644 --- a/libpod/container_exec.go +++ b/libpod/container_exec.go @@ -822,87 +822,7 @@ func (c *Container) ExecResize(sessionID string, newSize resize.TerminalSize) er } func (c *Container) healthCheckExec(config *ExecConfig, timeout time.Duration, streams *define.AttachStreams) (int, error) { - unlock := true - if !c.batched { - c.lock.Lock() - defer func() { - if unlock { - c.lock.Unlock() - } - }() - - if err := c.syncContainer(); err != nil { - return -1, err - } - } - - if err := c.verifyExecConfig(config); err != nil { - return -1, err - } - - if !c.ensureState(define.ContainerStateRunning) { - return -1, fmt.Errorf("can only create exec sessions on running containers: %w", define.ErrCtrStateInvalid) - } - - session, err := c.createExecSession(config) - if err != nil { - return -1, err - } - - if c.state.ExecSessions == nil { - c.state.ExecSessions = make(map[string]*ExecSession) - } - c.state.ExecSessions[session.ID()] = session - defer delete(c.state.ExecSessions, session.ID()) - - opts, err := prepareForExec(c, session) - if err != nil { - return -1, err - } - defer func() { - // cleanupExecBundle MUST be called with the parent container locked. - if !unlock && !c.batched { - c.lock.Lock() - unlock = true - - if err := c.syncContainer(); err != nil { - logrus.Errorf("Error syncing container %s state: %v", c.ID(), err) - // Normally we'd want to continue here, get rid of the exec directory. - // But the risk of proceeding into a function that can mutate state with a bad state is high. - // Lesser of two evils is to bail and leak a directory. - return - } - } - if err := c.cleanupExecBundle(session.ID()); err != nil { - logrus.Errorf("Container %s light exec session cleanup error: %v", c.ID(), err) - } - }() - - pid, attachErrChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams, nil) - if err != nil { - return -1, err - } - session.PID = pid - session.PIDData = getPidData(pid) - - if !c.batched { - c.lock.Unlock() - unlock = false - } - - select { - case err = <-attachErrChan: - if err != nil { - return -1, fmt.Errorf("container %s light exec session with pid: %d error: %v", c.ID(), pid, err) - } - case <-time.After(timeout): - if err := c.ociRuntime.ExecStopContainer(c, session.ID(), 0); err != nil { - return -1, err - } - return -1, fmt.Errorf("%v of %s", define.ErrHealthCheckTimeout, c.HealthCheckConfig().Timeout.String()) - } - - return c.readExecExitCode(session.ID()) + return c.execLightweight(config, streams, timeout) } func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan resize.TerminalSize) (int, error) { @@ -1321,3 +1241,99 @@ func justWriteExecExitCode(c *Container, sessionID string, exitCode int, emitEve // Finally, save our changes. return c.save() } + +// execLightweight executes a command in a container without creating a persistent exec session. +// It is used by both ExecNoSession and healthCheckExec to avoid code duplication. +func (c *Container) execLightweight(config *ExecConfig, streams *define.AttachStreams, timeout time.Duration) (int, error) { + if err := c.verifyExecConfig(config); err != nil { + return -1, err + } + + unlock := true + if !c.batched { + c.lock.Lock() + defer func() { + if unlock { + c.lock.Unlock() + } + }() + + if err := c.syncContainer(); err != nil { + return -1, err + } + } + + if !c.ensureState(define.ContainerStateRunning) { + return -1, fmt.Errorf("can only create exec sessions on running containers: %w", define.ErrCtrStateInvalid) + } + + session, err := c.createExecSession(config) + if err != nil { + return -1, err + } + + if c.state.ExecSessions == nil { + c.state.ExecSessions = make(map[string]*ExecSession) + } + c.state.ExecSessions[session.ID()] = session + defer delete(c.state.ExecSessions, session.ID()) + + opts, err := prepareForExec(c, session) + if err != nil { + return -1, err + } + defer func() { + if err := c.cleanupExecBundle(session.ID()); err != nil { + logrus.Errorf("Container %s light exec session cleanup error: %v", c.ID(), err) + } + }() + + pid, attachErrChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams, nil) + if err != nil { + // Check if the error is command not found before returning + if exitCode := define.ExitCode(err); exitCode == define.ExecErrorCodeNotFound { + return exitCode, err + } + return define.ExecErrorCodeGeneric, err + } + session.PID = pid + session.PIDData = getPidData(pid) + + if !c.batched { + c.lock.Unlock() + unlock = false + } + + // Handle timeout for health checks + if timeout > 0 { + select { + case err = <-attachErrChan: + if err != nil { + return -1, fmt.Errorf("container %s light exec session with pid: %d error: %v", c.ID(), pid, err) + } + case <-time.After(timeout): + if err := c.ociRuntime.ExecStopContainer(c, session.ID(), 0); err != nil { + return -1, err + } + return -1, fmt.Errorf("%v of %s", define.ErrHealthCheckTimeout, timeout.String()) + } + } else { + // For no-session exec, wait for completion without timeout + err = <-attachErrChan + if err != nil && !errors.Is(err, define.ErrDetach) { + // Check if the error is command not found + if exitCode := define.ExitCode(err); exitCode == define.ExecErrorCodeNotFound { + return exitCode, err + } + return define.ExecErrorCodeGeneric, err + } + } + + return c.readExecExitCode(session.ID()) +} + +// ExecNoSession executes a command in a container without creating a persistent exec session. +// It skips database operations and minimizes container locking for performance. +func (c *Container) ExecNoSession(config *ExecConfig, streams *define.AttachStreams, _ <-chan resize.TerminalSize) (int, error) { + return c.execLightweight(config, streams, 0) +} diff --git a/pkg/domain/entities/engine_container.go b/pkg/domain/entities/engine_container.go index 004d6112d7..30d645fa11 100644 --- a/pkg/domain/entities/engine_container.go +++ b/pkg/domain/entities/engine_container.go @@ -26,6 +26,7 @@ type ContainerEngine interface { //nolint:interfacebloat ContainerCopyToArchive(ctx context.Context, nameOrID string, path string, writer io.Writer) (ContainerCopyFunc, error) ContainerCreate(ctx context.Context, s *specgen.SpecGenerator) (*ContainerCreateReport, error) ContainerExec(ctx context.Context, nameOrID string, options ExecOptions, streams define.AttachStreams) (int, error) + ContainerExecNoSession(ctx context.Context, nameOrID string, options ExecOptions, streams define.AttachStreams) (int, error) ContainerExecDetached(ctx context.Context, nameOrID string, options ExecOptions) (string, error) ContainerExists(ctx context.Context, nameOrID string, options ContainerExistsOptions) (*BoolReport, error) ContainerExport(ctx context.Context, nameOrID string, options ContainerExportOptions) error diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index 75314e4571..d986247233 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -872,7 +872,7 @@ func (ic *ContainerEngine) ContainerAttach(ctx context.Context, nameOrID string, return nil } -func makeExecConfig(options entities.ExecOptions, rt *libpod.Runtime) (*libpod.ExecConfig, error) { +func makeExecConfig(options entities.ExecOptions, rt *libpod.Runtime, noSession bool) (*libpod.ExecConfig, error) { execConfig := new(libpod.ExecConfig) execConfig.Command = options.Cmd execConfig.Terminal = options.Tty @@ -885,18 +885,21 @@ func makeExecConfig(options entities.ExecOptions, rt *libpod.Runtime) (*libpod.E execConfig.PreserveFD = options.PreserveFD execConfig.AttachStdin = options.Interactive - // Make an exit command - storageConfig := rt.StorageConfig() - runtimeConfig, err := rt.GetConfig() - if err != nil { - return nil, fmt.Errorf("retrieving Libpod configuration to build exec exit command: %w", err) + // Only set up exit command for regular exec sessions, not no-session mode + if !noSession { + // Make an exit command + storageConfig := rt.StorageConfig() + runtimeConfig, err := rt.GetConfig() + if err != nil { + return nil, fmt.Errorf("retrieving Libpod configuration to build exec exit command: %w", err) + } + // TODO: Add some ability to toggle syslog + exitCommandArgs, err := specgenutil.CreateExitCommandArgs(storageConfig, runtimeConfig, logrus.IsLevelEnabled(logrus.DebugLevel), false, false, true) + if err != nil { + return nil, fmt.Errorf("constructing exit command for exec session: %w", err) + } + execConfig.ExitCommand = exitCommandArgs } - // TODO: Add some ability to toggle syslog - exitCommandArgs, err := specgenutil.CreateExitCommandArgs(storageConfig, runtimeConfig, logrus.IsLevelEnabled(logrus.DebugLevel), false, false, true) - if err != nil { - return nil, fmt.Errorf("constructing exit command for exec session: %w", err) - } - execConfig.ExitCommand = exitCommandArgs return execConfig, nil } @@ -946,7 +949,7 @@ func (ic *ContainerEngine) ContainerExec(ctx context.Context, nameOrID string, o util.ExecAddTERM(ctr.Env(), options.Envs) } - execConfig, err := makeExecConfig(options, ic.Libpod) + execConfig, err := makeExecConfig(options, ic.Libpod, false) if err != nil { return ec, err } @@ -955,6 +958,36 @@ func (ic *ContainerEngine) ContainerExec(ctx context.Context, nameOrID string, o return define.TranslateExecErrorToExitCode(ec, err), err } +func (ic *ContainerEngine) ContainerExecNoSession(_ context.Context, nameOrID string, options entities.ExecOptions, streams define.AttachStreams) (int, error) { + ec := define.ExecErrorCodeGeneric + err := checkExecPreserveFDs(options) + if err != nil { + return ec, err + } + + containers, err := getContainers(ic.Libpod, getContainersOptions{latest: options.Latest, names: []string{nameOrID}}) + if err != nil { + return ec, err + } + if len(containers) != 1 { + return ec, fmt.Errorf("%w: expected to find exactly one container but got %d", define.ErrInternal, len(containers)) + } + ctr := containers[0] + + if options.Tty { + util.ExecAddTERM(ctr.Env(), options.Envs) + } + + execConfig, err := makeExecConfig(options, ic.Libpod, true) + if err != nil { + return ec, err + } + + ec, err = ctr.ExecNoSession(execConfig, &streams, nil) + // Translate exit codes for consistency with regular exec + return define.TranslateExecErrorToExitCode(ec, err), err +} + func (ic *ContainerEngine) ContainerExecDetached(_ context.Context, nameOrID string, options entities.ExecOptions) (string, error) { err := checkExecPreserveFDs(options) if err != nil { @@ -970,7 +1003,7 @@ func (ic *ContainerEngine) ContainerExecDetached(_ context.Context, nameOrID str } ctr := containers[0] - execConfig, err := makeExecConfig(options, ic.Libpod) + execConfig, err := makeExecConfig(options, ic.Libpod, false) if err != nil { return "", err } diff --git a/pkg/domain/infra/tunnel/containers.go b/pkg/domain/infra/tunnel/containers.go index 83c6d4d1e3..1be1b6f640 100644 --- a/pkg/domain/infra/tunnel/containers.go +++ b/pkg/domain/infra/tunnel/containers.go @@ -656,6 +656,10 @@ func (ic *ContainerEngine) ContainerExec(_ context.Context, nameOrID string, opt return inspectOut.ExitCode, nil } +func (ic *ContainerEngine) ContainerExecNoSession(_ context.Context, _ string, _ entities.ExecOptions, _ define.AttachStreams) (int, error) { + return 0, errors.New("--no-session is not supported for the remote client") +} + func (ic *ContainerEngine) ContainerExecDetached(_ context.Context, nameOrID string, options entities.ExecOptions) (retSessionID string, retErr error) { createConfig := makeExecConfig(options) diff --git a/test/e2e/exec_test.go b/test/e2e/exec_test.go index d3d43da33b..82f3979d6d 100644 --- a/test/e2e/exec_test.go +++ b/test/e2e/exec_test.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "strings" + "time" . "github.com/containers/podman/v6/test/utils" . "github.com/onsi/ginkgo/v2" @@ -616,4 +617,57 @@ RUN useradd -u 1000 auser`, fedoraMinimal) Expect(session).Should(ExitCleanly()) Expect(session.OutputToString()).To(Equal("root")) }) + + It("podman exec with --no-session flag", func() { + SkipIfRemote("The --no-session flag is not supported for remote clients") + session := podmanTest.RunTopContainer("no_session_test") + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitCleanly()) + + execResult := podmanTest.Podman([]string{"exec", "--no-session", "no_session_test", "echo", "hello"}) + execResult.WaitWithDefaultTimeout() + Expect(execResult).Should(ExitCleanly()) + Expect(execResult.OutputToString()).To(Equal("hello")) + }) + + It("podman stop is not blocked by a long-running --no-session exec", func() { + SkipIfRemote("The --no-session flag is not supported for remote clients") + + ctrName := "no_session_lock_test" + session := podmanTest.RunTopContainer(ctrName) + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitCleanly()) + + execSession := podmanTest.Podman([]string{"exec", "--no-session", ctrName, "sleep", "30"}) + stopSession := podmanTest.Podman([]string{"stop", "-t", "5", ctrName}) + stopSession.WaitWithDefaultTimeout() + Expect(stopSession).Should(ExitCleanly()) + Eventually(execSession, "5s").Should(Not(Exit(0))) + }) + + It("podman exec --no-session exit codes", func() { + SkipIfRemote("The --no-session flag is not supported for remote clients") + + ctrName := "no_session_exit_code_test" + session := podmanTest.RunTopContainer(ctrName) + session.WaitWithDefaultTimeout() + Expect(session).Should(ExitCleanly()) + + execResult := podmanTest.Podman([]string{"exec", "--no-session", ctrName, "sh", "-c", "exit 42"}) + execResult.WaitWithDefaultTimeout() + Expect(execResult).Should(ExitWithError(42, "")) + + execResult = podmanTest.Podman([]string{"exec", "--no-session", ctrName, "nonexistentcommand"}) + execResult.WaitWithDefaultTimeout() + Expect(execResult).Should(ExitWithError(127, "OCI runtime attempted to invoke a command that was not found")) + + execSession := podmanTest.Podman([]string{"exec", "--no-session", ctrName, "sleep", "30"}) + time.Sleep(2 * time.Second) // Give time for the first exec to start (CI is slow) + killSession := podmanTest.Podman([]string{"exec", ctrName, "sh", "-c", "kill -9 $(pgrep sleep)"}) + killSession.WaitWithDefaultTimeout() + Expect(killSession).Should(ExitCleanly()) + + execSession.WaitWithDefaultTimeout() + Expect(execSession).Should(ExitWithError(137, "")) + }) })