feat(exec): Add --no-session flag for improved performance

Fixes: #26588

For use cases like HPC, where `podman exec` is called in rapid succession, the standard exec process can become a bottleneck due to container locking and database I/O for session tracking.

This commit introduces a new `--no-session` flag to `podman exec`. When used, this flag invokes a new, lightweight backend implementation that:

- Skips container locking, reducing lock contention
- Bypasses the creation, tracking, and removal of exec sessions in the database
- Executes the command directly and retrieves the exit code without persisting session state
- Maintains consistency with regular exec for container lookup, TTY handling, and environment setup
- Shares implementation with health check execution to avoid code duplication

The implementation addresses all performance bottlenecks while preserving compatibility with existing exec functionality including --latest flag support and proper exit code handling.

Changes include:
- Add --no-session flag to cmd/podman/containers/exec.go
- Implement lightweight execution path in libpod/container_exec.go
- Ensure consistent container validation and environment setup
- Add comprehensive exit code testing including signal handling (exit 137)
- Optimize configuration to skip unnecessary exit command setup

Signed-off-by: Ryan McCann <ryan_mccann@student.uml.edu>
Signed-off-by: ryanmccann1024 <ryan_mccann@student.uml.edu>
This commit is contained in:
ryanmccann1024
2025-07-31 11:16:57 -05:00
parent 5c48d02fe8
commit 61cbc0c3ee
8 changed files with 237 additions and 105 deletions

View File

@@ -51,6 +51,7 @@ var (
execOpts entities.ExecOptions
execDetach bool
execCidFile string
execNoSession bool
)
func execFlags(cmd *cobra.Command) {
@@ -100,6 +101,10 @@ func execFlags(cmd *cobra.Command) {
flags.Int32(waitFlagName, 0, "Total seconds to wait for container to start")
_ = flags.MarkHidden(waitFlagName)
if !registry.IsRemote() {
flags.BoolVar(&execNoSession, "no-session", false, "Do not create a database session for the exec process")
}
if registry.IsRemote() {
_ = flags.MarkHidden("preserve-fds")
}
@@ -121,6 +126,12 @@ func init() {
}
func exec(cmd *cobra.Command, args []string) error {
if execNoSession {
if execDetach || cmd.Flags().Changed("detach-keys") {
return errors.New("--no-session cannot be used with --detach or --detach-keys")
}
}
nameOrID, command, err := determineTargetCtrAndCmd(args, execOpts.Latest, execCidFile != "")
if err != nil {
return err
@@ -168,17 +179,21 @@ func exec(cmd *cobra.Command, args []string) error {
}
}
if !execDetach {
streams := define.AttachStreams{}
streams.OutputStream = os.Stdout
streams.ErrorStream = os.Stderr
if execOpts.Interactive {
streams.InputStream = bufio.NewReader(os.Stdin)
streams.AttachInput = true
}
streams.AttachOutput = true
streams.AttachError = true
streams := define.AttachStreams{}
streams.OutputStream = os.Stdout
streams.ErrorStream = os.Stderr
if execOpts.Interactive {
streams.InputStream = bufio.NewReader(os.Stdin)
streams.AttachInput = true
}
streams.AttachOutput = true
streams.AttachError = true
if execNoSession {
exitCode, err := registry.ContainerEngine().ContainerExecNoSession(registry.Context(), nameOrID, execOpts, streams)
registry.SetExitCode(exitCode)
return err
} else if !execDetach {
exitCode, err := registry.ContainerEngine().ContainerExec(registry.Context(), nameOrID, execOpts, streams)
registry.SetExitCode(exitCode)
return err

View File

@@ -0,0 +1,7 @@
####> This option file is used in:
####> podman exec
####> If file is edited, make sure the changes
####> are applicable to all of those.
#### **--no-session**
Do not create a database session for the exec process. This can improve performance but the exec session will not be visible to other podman commands.

View File

@@ -31,6 +31,8 @@ Start the exec session, but do not attach to it. The command runs in the backgro
@@option latest
@@option no-session
@@option preserve-fd
@@option preserve-fds

View File

@@ -822,87 +822,7 @@ func (c *Container) ExecResize(sessionID string, newSize resize.TerminalSize) er
}
func (c *Container) healthCheckExec(config *ExecConfig, timeout time.Duration, streams *define.AttachStreams) (int, error) {
unlock := true
if !c.batched {
c.lock.Lock()
defer func() {
if unlock {
c.lock.Unlock()
}
}()
if err := c.syncContainer(); err != nil {
return -1, err
}
}
if err := c.verifyExecConfig(config); err != nil {
return -1, err
}
if !c.ensureState(define.ContainerStateRunning) {
return -1, fmt.Errorf("can only create exec sessions on running containers: %w", define.ErrCtrStateInvalid)
}
session, err := c.createExecSession(config)
if err != nil {
return -1, err
}
if c.state.ExecSessions == nil {
c.state.ExecSessions = make(map[string]*ExecSession)
}
c.state.ExecSessions[session.ID()] = session
defer delete(c.state.ExecSessions, session.ID())
opts, err := prepareForExec(c, session)
if err != nil {
return -1, err
}
defer func() {
// cleanupExecBundle MUST be called with the parent container locked.
if !unlock && !c.batched {
c.lock.Lock()
unlock = true
if err := c.syncContainer(); err != nil {
logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
// Normally we'd want to continue here, get rid of the exec directory.
// But the risk of proceeding into a function that can mutate state with a bad state is high.
// Lesser of two evils is to bail and leak a directory.
return
}
}
if err := c.cleanupExecBundle(session.ID()); err != nil {
logrus.Errorf("Container %s light exec session cleanup error: %v", c.ID(), err)
}
}()
pid, attachErrChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams, nil)
if err != nil {
return -1, err
}
session.PID = pid
session.PIDData = getPidData(pid)
if !c.batched {
c.lock.Unlock()
unlock = false
}
select {
case err = <-attachErrChan:
if err != nil {
return -1, fmt.Errorf("container %s light exec session with pid: %d error: %v", c.ID(), pid, err)
}
case <-time.After(timeout):
if err := c.ociRuntime.ExecStopContainer(c, session.ID(), 0); err != nil {
return -1, err
}
return -1, fmt.Errorf("%v of %s", define.ErrHealthCheckTimeout, c.HealthCheckConfig().Timeout.String())
}
return c.readExecExitCode(session.ID())
return c.execLightweight(config, streams, timeout)
}
func (c *Container) Exec(config *ExecConfig, streams *define.AttachStreams, resize <-chan resize.TerminalSize) (int, error) {
@@ -1321,3 +1241,99 @@ func justWriteExecExitCode(c *Container, sessionID string, exitCode int, emitEve
// Finally, save our changes.
return c.save()
}
// execLightweight executes a command in a container without creating a persistent exec session.
// It is used by both ExecNoSession and healthCheckExec to avoid code duplication.
func (c *Container) execLightweight(config *ExecConfig, streams *define.AttachStreams, timeout time.Duration) (int, error) {
if err := c.verifyExecConfig(config); err != nil {
return -1, err
}
unlock := true
if !c.batched {
c.lock.Lock()
defer func() {
if unlock {
c.lock.Unlock()
}
}()
if err := c.syncContainer(); err != nil {
return -1, err
}
}
if !c.ensureState(define.ContainerStateRunning) {
return -1, fmt.Errorf("can only create exec sessions on running containers: %w", define.ErrCtrStateInvalid)
}
session, err := c.createExecSession(config)
if err != nil {
return -1, err
}
if c.state.ExecSessions == nil {
c.state.ExecSessions = make(map[string]*ExecSession)
}
c.state.ExecSessions[session.ID()] = session
defer delete(c.state.ExecSessions, session.ID())
opts, err := prepareForExec(c, session)
if err != nil {
return -1, err
}
defer func() {
if err := c.cleanupExecBundle(session.ID()); err != nil {
logrus.Errorf("Container %s light exec session cleanup error: %v", c.ID(), err)
}
}()
pid, attachErrChan, err := c.ociRuntime.ExecContainer(c, session.ID(), opts, streams, nil)
if err != nil {
// Check if the error is command not found before returning
if exitCode := define.ExitCode(err); exitCode == define.ExecErrorCodeNotFound {
return exitCode, err
}
return define.ExecErrorCodeGeneric, err
}
session.PID = pid
session.PIDData = getPidData(pid)
if !c.batched {
c.lock.Unlock()
unlock = false
}
// Handle timeout for health checks
if timeout > 0 {
select {
case err = <-attachErrChan:
if err != nil {
return -1, fmt.Errorf("container %s light exec session with pid: %d error: %v", c.ID(), pid, err)
}
case <-time.After(timeout):
if err := c.ociRuntime.ExecStopContainer(c, session.ID(), 0); err != nil {
return -1, err
}
return -1, fmt.Errorf("%v of %s", define.ErrHealthCheckTimeout, timeout.String())
}
} else {
// For no-session exec, wait for completion without timeout
err = <-attachErrChan
if err != nil && !errors.Is(err, define.ErrDetach) {
// Check if the error is command not found
if exitCode := define.ExitCode(err); exitCode == define.ExecErrorCodeNotFound {
return exitCode, err
}
return define.ExecErrorCodeGeneric, err
}
}
return c.readExecExitCode(session.ID())
}
// ExecNoSession executes a command in a container without creating a persistent exec session.
// It skips database operations and minimizes container locking for performance.
func (c *Container) ExecNoSession(config *ExecConfig, streams *define.AttachStreams, _ <-chan resize.TerminalSize) (int, error) {
return c.execLightweight(config, streams, 0)
}

View File

@@ -26,6 +26,7 @@ type ContainerEngine interface { //nolint:interfacebloat
ContainerCopyToArchive(ctx context.Context, nameOrID string, path string, writer io.Writer) (ContainerCopyFunc, error)
ContainerCreate(ctx context.Context, s *specgen.SpecGenerator) (*ContainerCreateReport, error)
ContainerExec(ctx context.Context, nameOrID string, options ExecOptions, streams define.AttachStreams) (int, error)
ContainerExecNoSession(ctx context.Context, nameOrID string, options ExecOptions, streams define.AttachStreams) (int, error)
ContainerExecDetached(ctx context.Context, nameOrID string, options ExecOptions) (string, error)
ContainerExists(ctx context.Context, nameOrID string, options ContainerExistsOptions) (*BoolReport, error)
ContainerExport(ctx context.Context, nameOrID string, options ContainerExportOptions) error

View File

@@ -872,7 +872,7 @@ func (ic *ContainerEngine) ContainerAttach(ctx context.Context, nameOrID string,
return nil
}
func makeExecConfig(options entities.ExecOptions, rt *libpod.Runtime) (*libpod.ExecConfig, error) {
func makeExecConfig(options entities.ExecOptions, rt *libpod.Runtime, noSession bool) (*libpod.ExecConfig, error) {
execConfig := new(libpod.ExecConfig)
execConfig.Command = options.Cmd
execConfig.Terminal = options.Tty
@@ -885,18 +885,21 @@ func makeExecConfig(options entities.ExecOptions, rt *libpod.Runtime) (*libpod.E
execConfig.PreserveFD = options.PreserveFD
execConfig.AttachStdin = options.Interactive
// Make an exit command
storageConfig := rt.StorageConfig()
runtimeConfig, err := rt.GetConfig()
if err != nil {
return nil, fmt.Errorf("retrieving Libpod configuration to build exec exit command: %w", err)
// Only set up exit command for regular exec sessions, not no-session mode
if !noSession {
// Make an exit command
storageConfig := rt.StorageConfig()
runtimeConfig, err := rt.GetConfig()
if err != nil {
return nil, fmt.Errorf("retrieving Libpod configuration to build exec exit command: %w", err)
}
// TODO: Add some ability to toggle syslog
exitCommandArgs, err := specgenutil.CreateExitCommandArgs(storageConfig, runtimeConfig, logrus.IsLevelEnabled(logrus.DebugLevel), false, false, true)
if err != nil {
return nil, fmt.Errorf("constructing exit command for exec session: %w", err)
}
execConfig.ExitCommand = exitCommandArgs
}
// TODO: Add some ability to toggle syslog
exitCommandArgs, err := specgenutil.CreateExitCommandArgs(storageConfig, runtimeConfig, logrus.IsLevelEnabled(logrus.DebugLevel), false, false, true)
if err != nil {
return nil, fmt.Errorf("constructing exit command for exec session: %w", err)
}
execConfig.ExitCommand = exitCommandArgs
return execConfig, nil
}
@@ -946,7 +949,7 @@ func (ic *ContainerEngine) ContainerExec(ctx context.Context, nameOrID string, o
util.ExecAddTERM(ctr.Env(), options.Envs)
}
execConfig, err := makeExecConfig(options, ic.Libpod)
execConfig, err := makeExecConfig(options, ic.Libpod, false)
if err != nil {
return ec, err
}
@@ -955,6 +958,36 @@ func (ic *ContainerEngine) ContainerExec(ctx context.Context, nameOrID string, o
return define.TranslateExecErrorToExitCode(ec, err), err
}
func (ic *ContainerEngine) ContainerExecNoSession(_ context.Context, nameOrID string, options entities.ExecOptions, streams define.AttachStreams) (int, error) {
ec := define.ExecErrorCodeGeneric
err := checkExecPreserveFDs(options)
if err != nil {
return ec, err
}
containers, err := getContainers(ic.Libpod, getContainersOptions{latest: options.Latest, names: []string{nameOrID}})
if err != nil {
return ec, err
}
if len(containers) != 1 {
return ec, fmt.Errorf("%w: expected to find exactly one container but got %d", define.ErrInternal, len(containers))
}
ctr := containers[0]
if options.Tty {
util.ExecAddTERM(ctr.Env(), options.Envs)
}
execConfig, err := makeExecConfig(options, ic.Libpod, true)
if err != nil {
return ec, err
}
ec, err = ctr.ExecNoSession(execConfig, &streams, nil)
// Translate exit codes for consistency with regular exec
return define.TranslateExecErrorToExitCode(ec, err), err
}
func (ic *ContainerEngine) ContainerExecDetached(_ context.Context, nameOrID string, options entities.ExecOptions) (string, error) {
err := checkExecPreserveFDs(options)
if err != nil {
@@ -970,7 +1003,7 @@ func (ic *ContainerEngine) ContainerExecDetached(_ context.Context, nameOrID str
}
ctr := containers[0]
execConfig, err := makeExecConfig(options, ic.Libpod)
execConfig, err := makeExecConfig(options, ic.Libpod, false)
if err != nil {
return "", err
}

View File

@@ -656,6 +656,10 @@ func (ic *ContainerEngine) ContainerExec(_ context.Context, nameOrID string, opt
return inspectOut.ExitCode, nil
}
func (ic *ContainerEngine) ContainerExecNoSession(_ context.Context, _ string, _ entities.ExecOptions, _ define.AttachStreams) (int, error) {
return 0, errors.New("--no-session is not supported for the remote client")
}
func (ic *ContainerEngine) ContainerExecDetached(_ context.Context, nameOrID string, options entities.ExecOptions) (retSessionID string, retErr error) {
createConfig := makeExecConfig(options)

View File

@@ -7,6 +7,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
. "github.com/containers/podman/v6/test/utils"
. "github.com/onsi/ginkgo/v2"
@@ -616,4 +617,57 @@ RUN useradd -u 1000 auser`, fedoraMinimal)
Expect(session).Should(ExitCleanly())
Expect(session.OutputToString()).To(Equal("root"))
})
It("podman exec with --no-session flag", func() {
SkipIfRemote("The --no-session flag is not supported for remote clients")
session := podmanTest.RunTopContainer("no_session_test")
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())
execResult := podmanTest.Podman([]string{"exec", "--no-session", "no_session_test", "echo", "hello"})
execResult.WaitWithDefaultTimeout()
Expect(execResult).Should(ExitCleanly())
Expect(execResult.OutputToString()).To(Equal("hello"))
})
It("podman stop is not blocked by a long-running --no-session exec", func() {
SkipIfRemote("The --no-session flag is not supported for remote clients")
ctrName := "no_session_lock_test"
session := podmanTest.RunTopContainer(ctrName)
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())
execSession := podmanTest.Podman([]string{"exec", "--no-session", ctrName, "sleep", "30"})
stopSession := podmanTest.Podman([]string{"stop", "-t", "5", ctrName})
stopSession.WaitWithDefaultTimeout()
Expect(stopSession).Should(ExitCleanly())
Eventually(execSession, "5s").Should(Not(Exit(0)))
})
It("podman exec --no-session exit codes", func() {
SkipIfRemote("The --no-session flag is not supported for remote clients")
ctrName := "no_session_exit_code_test"
session := podmanTest.RunTopContainer(ctrName)
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())
execResult := podmanTest.Podman([]string{"exec", "--no-session", ctrName, "sh", "-c", "exit 42"})
execResult.WaitWithDefaultTimeout()
Expect(execResult).Should(ExitWithError(42, ""))
execResult = podmanTest.Podman([]string{"exec", "--no-session", ctrName, "nonexistentcommand"})
execResult.WaitWithDefaultTimeout()
Expect(execResult).Should(ExitWithError(127, "OCI runtime attempted to invoke a command that was not found"))
execSession := podmanTest.Podman([]string{"exec", "--no-session", ctrName, "sleep", "30"})
time.Sleep(2 * time.Second) // Give time for the first exec to start (CI is slow)
killSession := podmanTest.Podman([]string{"exec", ctrName, "sh", "-c", "kill -9 $(pgrep sleep)"})
killSession.WaitWithDefaultTimeout()
Expect(killSession).Should(ExitCleanly())
execSession.WaitWithDefaultTimeout()
Expect(execSession).Should(ExitWithError(137, ""))
})
})