add "healthy" sdnotify policy

Add a new "healthy" sdnotify policy that instructs Podman to send the
READY message once the container has turned healthy.

Fixes: #6160
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2023-07-25 09:45:00 +02:00
parent 2a25d1d746
commit 0cfd12786f
9 changed files with 109 additions and 14 deletions

View File

@ -1545,7 +1545,7 @@ func AutocompleteLogLevel(cmd *cobra.Command, args []string, toComplete string)
// AutocompleteSDNotify - Autocomplete sdnotify options. // AutocompleteSDNotify - Autocomplete sdnotify options.
// -> "container", "conmon", "ignore" // -> "container", "conmon", "ignore"
func AutocompleteSDNotify(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { func AutocompleteSDNotify(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
types := []string{define.SdNotifyModeContainer, define.SdNotifyModeContainer, define.SdNotifyModeIgnore} types := []string{define.SdNotifyModeConmon, define.SdNotifyModeContainer, define.SdNotifyModeHealthy, define.SdNotifyModeIgnore}
return types, cobra.ShellCompDirectiveNoFileComp return types, cobra.ShellCompDirectiveNoFileComp
} }

View File

@ -2,7 +2,7 @@
####> podman create, run ####> podman create, run
####> If file is edited, make sure the changes ####> If file is edited, make sure the changes
####> are applicable to all of those. ####> are applicable to all of those.
#### **--sdnotify**=**container** | *conmon* | *ignore* #### **--sdnotify**=**container** | *conmon* | *healthy* | *ignore*
Determines how to use the NOTIFY_SOCKET, as passed with systemd and Type=notify. Determines how to use the NOTIFY_SOCKET, as passed with systemd and Type=notify.
@ -10,5 +10,7 @@ Default is **container**, which means allow the OCI runtime to proxy the socket
container to receive ready notification. Podman sets the MAINPID to conmon's pid. container to receive ready notification. Podman sets the MAINPID to conmon's pid.
The **conmon** option sets MAINPID to conmon's pid, and sends READY when the container The **conmon** option sets MAINPID to conmon's pid, and sends READY when the container
has started. The socket is never passed to the runtime or the container. has started. The socket is never passed to the runtime or the container.
The **healthy** option sets MAINPID to conmon's pid, and sends READY when the container
has turned healthy; requires a healthcheck to be set. The socket is never passed to the runtime or the container.
The **ignore** option removes NOTIFY_SOCKET from the environment for itself and child processes, The **ignore** option removes NOTIFY_SOCKET from the environment for itself and child processes,
for the case where some other process above Podman uses NOTIFY_SOCKET and Podman does not use it. for the case where some other process above Podman uses NOTIFY_SOCKET and Podman does not use it.

View File

@ -113,7 +113,7 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
} }
// Start the container // Start the container
return c.start() return c.start(ctx)
} }
// Update updates the given container. // Update updates the given container.

View File

@ -308,7 +308,7 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
return false, err return false, err
} }
} }
if err := c.start(); err != nil { if err := c.start(ctx); err != nil {
return false, err return false, err
} }
return true, nil return true, nil
@ -1198,11 +1198,11 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
} }
// Now start the container // Now start the container
return c.start() return c.start(ctx)
} }
// Internal, non-locking function to start a container // Internal, non-locking function to start a container
func (c *Container) start() error { func (c *Container) start(ctx context.Context) error {
if c.config.Spec.Process != nil { if c.config.Spec.Process != nil {
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args) logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
} }
@ -1214,9 +1214,11 @@ func (c *Container) start() error {
c.state.State = define.ContainerStateRunning c.state.State = define.ContainerStateRunning
// Unless being ignored, set the MAINPID to conmon.
if c.config.SdNotifyMode != define.SdNotifyModeIgnore { if c.config.SdNotifyMode != define.SdNotifyModeIgnore {
payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID) payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID)
if c.config.SdNotifyMode == define.SdNotifyModeConmon { if c.config.SdNotifyMode == define.SdNotifyModeConmon {
// Also send the READY message for the "conmon" policy.
payload += "\n" payload += "\n"
payload += daemon.SdNotifyReady payload += daemon.SdNotifyReady
} }
@ -1241,7 +1243,32 @@ func (c *Container) start() error {
defer c.newContainerEvent(events.Start) defer c.newContainerEvent(events.Start)
return c.save() if err := c.save(); err != nil {
return err
}
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
return nil
}
// Wait for the container to turn healthy before sending the READY
// message. This implies that we need to unlock and re-lock the
// container.
if !c.batched {
c.lock.Unlock()
defer c.lock.Lock()
}
if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
return err
}
if err := notifyproxy.SendMessage(c.config.SdNotifySocket, daemon.SdNotifyReady); err != nil {
logrus.Errorf("Sending READY message after turning healthy: %s", err.Error())
} else {
logrus.Debugf("Notify sent successfully")
}
return nil
} }
// Internal, non-locking function to stop container // Internal, non-locking function to stop container
@ -1487,7 +1514,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
return err return err
} }
} }
return c.start() return c.start(ctx)
} }
// mountStorage sets up the container's root filesystem // mountStorage sets up the container's root filesystem

View File

@ -4,17 +4,18 @@ import "fmt"
// Strings used for --sdnotify option to podman // Strings used for --sdnotify option to podman
const ( const (
SdNotifyModeContainer = "container"
SdNotifyModeConmon = "conmon" SdNotifyModeConmon = "conmon"
SdNotifyModeContainer = "container"
SdNotifyModeHealthy = "healthy"
SdNotifyModeIgnore = "ignore" SdNotifyModeIgnore = "ignore"
) )
// ValidateSdNotifyMode validates the specified mode. // ValidateSdNotifyMode validates the specified mode.
func ValidateSdNotifyMode(mode string) error { func ValidateSdNotifyMode(mode string) error {
switch mode { switch mode {
case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore: case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore, SdNotifyModeHealthy:
return nil return nil
default: default:
return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore) return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeConmon, SdNotifyModeContainer, SdNotifyModeHealthy, SdNotifyModeIgnore)
} }
} }

View File

@ -4,6 +4,7 @@
package libpod package libpod
import ( import (
"context"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@ -86,7 +87,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
// If starting was requested, start the container and notify when that's // If starting was requested, start the container and notify when that's
// done. // done.
if params.Start { if params.Start {
if err := c.start(); err != nil { if err := c.start(context.TODO()); err != nil {
return err return err
} }
params.Started <- true params.Started <- true

View File

@ -14,8 +14,6 @@ var (
ErrInvalidSpecConfig = errors.New("invalid configuration") ErrInvalidSpecConfig = errors.New("invalid configuration")
// SystemDValues describes the only values that SystemD can be // SystemDValues describes the only values that SystemD can be
SystemDValues = []string{"true", "false", "always"} SystemDValues = []string{"true", "false", "always"}
// SdNotifyModeValues describes the only values that SdNotifyMode can be
SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore}
// ImageVolumeModeValues describes the only values that ImageVolumeMode can be // ImageVolumeModeValues describes the only values that ImageVolumeMode can be
ImageVolumeModeValues = []string{"ignore", define.TypeTmpfs, "anonymous"} ImageVolumeModeValues = []string{"ignore", define.TypeTmpfs, "anonymous"}
) )

View File

@ -601,18 +601,25 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
} }
options = append(options, libpod.WithRestartRetries(retries), libpod.WithRestartPolicy(restartPolicy)) options = append(options, libpod.WithRestartRetries(retries), libpod.WithRestartPolicy(restartPolicy))
healthCheckSet := false
if s.ContainerHealthCheckConfig.HealthConfig != nil { if s.ContainerHealthCheckConfig.HealthConfig != nil {
options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig)) options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig))
logrus.Debugf("New container has a health check") logrus.Debugf("New container has a health check")
healthCheckSet = true
} }
if s.ContainerHealthCheckConfig.StartupHealthConfig != nil { if s.ContainerHealthCheckConfig.StartupHealthConfig != nil {
options = append(options, libpod.WithStartupHealthcheck(s.ContainerHealthCheckConfig.StartupHealthConfig)) options = append(options, libpod.WithStartupHealthcheck(s.ContainerHealthCheckConfig.StartupHealthConfig))
healthCheckSet = true
} }
if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone { if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction)) options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))
} }
if s.SdNotifyMode == define.SdNotifyModeHealthy && !healthCheckSet {
return nil, fmt.Errorf("%w: sdnotify policy %q requires a healthcheck to be set", define.ErrInvalidArg, s.SdNotifyMode)
}
if len(s.Secrets) != 0 { if len(s.Secrets) != 0 {
manager, err := rt.SecretsManager() manager, err := rt.SecretsManager()
if err != nil { if err != nil {

View File

@ -184,6 +184,65 @@ READY=1"
_stop_socat _stop_socat
} }
# These tests can fail in dev. environment because of SELinux.
# quick fix: chcon -t container_runtime_exec_t ./bin/podman
@test "sdnotify : healthy" {
export NOTIFY_SOCKET=$PODMAN_TMPDIR/container.sock
_start_socat
wait_file="$PODMAN_TMPDIR/$(random_string).wait_for_me"
run_podman 125 create --sdnotify=healthy $IMAGE
is "$output" "Error: invalid argument: sdnotify policy \"healthy\" requires a healthcheck to be set"
# Create a container with a simple `/bin/true` healthcheck that we need to
# run manually.
ctr=$(random_string)
run_podman create --name $ctr \
--health-cmd=/bin/true \
--health-retries=1 \
--health-interval=disable \
--sdnotify=healthy \
$IMAGE sleep infinity
# Start the container in the background which will block until the
# container turned healthy. After that, create the wait_file which
# indicates that start has returned.
(timeout --foreground -v --kill=5 20 $PODMAN start $ctr && touch $wait_file) &
run_podman wait --condition=running $ctr
# Make sure that the MAINPID is set but without the READY message.
run_podman container inspect $ctr --format "{{.State.ConmonPid}}"
mainPID="$output"
# With container, READY=1 isn't necessarily the last message received;
# just look for it anywhere in received messages
run cat $_SOCAT_LOG
# The 'echo's help us debug failed runs
echo "socat log:"
echo "$output"
is "$output" "MAINPID=$mainPID" "Container is not healthy yet, so we only know the main PID"
# Now run the healthcheck and look for the READY message.
run_podman healthcheck run $ctr
is "$output" "" "output from 'podman healthcheck run'"
# Wait for start to return. At that point the READY message must have been
# sent.
wait_for_file $wait_file
run cat $_SOCAT_LOG
echo "socat log:"
echo "$output"
is "$output" "MAINPID=$mainPID
READY=1"
run_podman container inspect --format "{{.State.Status}}" $ctr
is "$output" "running" "make sure container is still running"
run_podman rm -f -t0 $ctr
_stop_socat
}
@test "sdnotify : play kube - no policies" { @test "sdnotify : play kube - no policies" {
# Create the YAMl file # Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml" yaml_source="$PODMAN_TMPDIR/test.yaml"