add "healthy" sdnotify policy

Add a new "healthy" sdnotify policy that instructs Podman to send the
READY message once the container has turned healthy.

Fixes: #6160
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2023-07-25 09:45:00 +02:00
parent 2a25d1d746
commit 0cfd12786f
9 changed files with 109 additions and 14 deletions

View File

@ -1545,7 +1545,7 @@ func AutocompleteLogLevel(cmd *cobra.Command, args []string, toComplete string)
// AutocompleteSDNotify - Autocomplete sdnotify options.
// -> "container", "conmon", "ignore"
func AutocompleteSDNotify(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
types := []string{define.SdNotifyModeContainer, define.SdNotifyModeContainer, define.SdNotifyModeIgnore}
types := []string{define.SdNotifyModeConmon, define.SdNotifyModeContainer, define.SdNotifyModeHealthy, define.SdNotifyModeIgnore}
return types, cobra.ShellCompDirectiveNoFileComp
}

View File

@ -2,7 +2,7 @@
####> podman create, run
####> If file is edited, make sure the changes
####> are applicable to all of those.
#### **--sdnotify**=**container** | *conmon* | *ignore*
#### **--sdnotify**=**container** | *conmon* | *healthy* | *ignore*
Determines how to use the NOTIFY_SOCKET, as passed with systemd and Type=notify.
@ -10,5 +10,7 @@ Default is **container**, which means allow the OCI runtime to proxy the socket
container to receive ready notification. Podman sets the MAINPID to conmon's pid.
The **conmon** option sets MAINPID to conmon's pid, and sends READY when the container
has started. The socket is never passed to the runtime or the container.
The **healthy** option sets MAINPID to conmon's pid, and sends READY when the container
has turned healthy; requires a healthcheck to be set. The socket is never passed to the runtime or the container.
The **ignore** option removes NOTIFY_SOCKET from the environment for itself and child processes,
for the case where some other process above Podman uses NOTIFY_SOCKET and Podman does not use it.

View File

@ -113,7 +113,7 @@ func (c *Container) Start(ctx context.Context, recursive bool) (finalErr error)
}
// Start the container
return c.start()
return c.start(ctx)
}
// Update updates the given container.

View File

@ -308,7 +308,7 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
return false, err
}
}
if err := c.start(); err != nil {
if err := c.start(ctx); err != nil {
return false, err
}
return true, nil
@ -1198,11 +1198,11 @@ func (c *Container) initAndStart(ctx context.Context) (retErr error) {
}
// Now start the container
return c.start()
return c.start(ctx)
}
// Internal, non-locking function to start a container
func (c *Container) start() error {
func (c *Container) start(ctx context.Context) error {
if c.config.Spec.Process != nil {
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
}
@ -1214,9 +1214,11 @@ func (c *Container) start() error {
c.state.State = define.ContainerStateRunning
// Unless being ignored, set the MAINPID to conmon.
if c.config.SdNotifyMode != define.SdNotifyModeIgnore {
payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID)
if c.config.SdNotifyMode == define.SdNotifyModeConmon {
// Also send the READY message for the "conmon" policy.
payload += "\n"
payload += daemon.SdNotifyReady
}
@ -1241,7 +1243,32 @@ func (c *Container) start() error {
defer c.newContainerEvent(events.Start)
return c.save()
if err := c.save(); err != nil {
return err
}
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
return nil
}
// Wait for the container to turn healthy before sending the READY
// message. This implies that we need to unlock and re-lock the
// container.
if !c.batched {
c.lock.Unlock()
defer c.lock.Lock()
}
if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
return err
}
if err := notifyproxy.SendMessage(c.config.SdNotifySocket, daemon.SdNotifyReady); err != nil {
logrus.Errorf("Sending READY message after turning healthy: %s", err.Error())
} else {
logrus.Debugf("Notify sent successfully")
}
return nil
}
// Internal, non-locking function to stop container
@ -1487,7 +1514,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
return err
}
}
return c.start()
return c.start(ctx)
}
// mountStorage sets up the container's root filesystem

View File

@ -4,17 +4,18 @@ import "fmt"
// Strings used for --sdnotify option to podman
const (
SdNotifyModeContainer = "container"
SdNotifyModeConmon = "conmon"
SdNotifyModeContainer = "container"
SdNotifyModeHealthy = "healthy"
SdNotifyModeIgnore = "ignore"
)
// ValidateSdNotifyMode validates the specified mode.
func ValidateSdNotifyMode(mode string) error {
switch mode {
case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore:
case "", SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore, SdNotifyModeHealthy:
return nil
default:
return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeContainer, SdNotifyModeConmon, SdNotifyModeIgnore)
return fmt.Errorf("%w: invalid sdnotify value %q: must be %s, %s, %s or %s", ErrInvalidArg, mode, SdNotifyModeConmon, SdNotifyModeContainer, SdNotifyModeHealthy, SdNotifyModeIgnore)
}
}

View File

@ -4,6 +4,7 @@
package libpod
import (
"context"
"errors"
"fmt"
"io"
@ -86,7 +87,7 @@ func (r *ConmonOCIRuntime) Attach(c *Container, params *AttachOptions) error {
// If starting was requested, start the container and notify when that's
// done.
if params.Start {
if err := c.start(); err != nil {
if err := c.start(context.TODO()); err != nil {
return err
}
params.Started <- true

View File

@ -14,8 +14,6 @@ var (
ErrInvalidSpecConfig = errors.New("invalid configuration")
// SystemDValues describes the only values that SystemD can be
SystemDValues = []string{"true", "false", "always"}
// SdNotifyModeValues describes the only values that SdNotifyMode can be
SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore}
// ImageVolumeModeValues describes the only values that ImageVolumeMode can be
ImageVolumeModeValues = []string{"ignore", define.TypeTmpfs, "anonymous"}
)

View File

@ -601,18 +601,25 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
}
options = append(options, libpod.WithRestartRetries(retries), libpod.WithRestartPolicy(restartPolicy))
healthCheckSet := false
if s.ContainerHealthCheckConfig.HealthConfig != nil {
options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig))
logrus.Debugf("New container has a health check")
healthCheckSet = true
}
if s.ContainerHealthCheckConfig.StartupHealthConfig != nil {
options = append(options, libpod.WithStartupHealthcheck(s.ContainerHealthCheckConfig.StartupHealthConfig))
healthCheckSet = true
}
if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))
}
if s.SdNotifyMode == define.SdNotifyModeHealthy && !healthCheckSet {
return nil, fmt.Errorf("%w: sdnotify policy %q requires a healthcheck to be set", define.ErrInvalidArg, s.SdNotifyMode)
}
if len(s.Secrets) != 0 {
manager, err := rt.SecretsManager()
if err != nil {

View File

@ -184,6 +184,65 @@ READY=1"
_stop_socat
}
# These tests can fail in dev. environment because of SELinux.
# quick fix: chcon -t container_runtime_exec_t ./bin/podman
@test "sdnotify : healthy" {
export NOTIFY_SOCKET=$PODMAN_TMPDIR/container.sock
_start_socat
wait_file="$PODMAN_TMPDIR/$(random_string).wait_for_me"
run_podman 125 create --sdnotify=healthy $IMAGE
is "$output" "Error: invalid argument: sdnotify policy \"healthy\" requires a healthcheck to be set"
# Create a container with a simple `/bin/true` healthcheck that we need to
# run manually.
ctr=$(random_string)
run_podman create --name $ctr \
--health-cmd=/bin/true \
--health-retries=1 \
--health-interval=disable \
--sdnotify=healthy \
$IMAGE sleep infinity
# Start the container in the background which will block until the
# container turned healthy. After that, create the wait_file which
# indicates that start has returned.
(timeout --foreground -v --kill=5 20 $PODMAN start $ctr && touch $wait_file) &
run_podman wait --condition=running $ctr
# Make sure that the MAINPID is set but without the READY message.
run_podman container inspect $ctr --format "{{.State.ConmonPid}}"
mainPID="$output"
# With container, READY=1 isn't necessarily the last message received;
# just look for it anywhere in received messages
run cat $_SOCAT_LOG
# The 'echo's help us debug failed runs
echo "socat log:"
echo "$output"
is "$output" "MAINPID=$mainPID" "Container is not healthy yet, so we only know the main PID"
# Now run the healthcheck and look for the READY message.
run_podman healthcheck run $ctr
is "$output" "" "output from 'podman healthcheck run'"
# Wait for start to return. At that point the READY message must have been
# sent.
wait_for_file $wait_file
run cat $_SOCAT_LOG
echo "socat log:"
echo "$output"
is "$output" "MAINPID=$mainPID
READY=1"
run_podman container inspect --format "{{.State.Status}}" $ctr
is "$output" "running" "make sure container is still running"
run_podman rm -f -t0 $ctr
_stop_socat
}
@test "sdnotify : play kube - no policies" {
# Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml"