mirror of
https://github.com/containers/podman.git
synced 2025-07-15 03:02:52 +08:00
health check: add on-failure actions
For systems that have extreme robustness requirements (edge devices, particularly those in difficult to access environments), it is important that applications continue running in all circumstances. When the application fails, Podman must restart it automatically to provide this robustness. Otherwise, these devices may require customer IT to physically gain access to restart, which can be prohibitively difficult. Add a new `--on-failure` flag that supports four actions: - **none**: Take no action. - **kill**: Kill the container. - **restart**: Restart the container. Do not combine the `restart` action with the `--restart` flag. When running inside of a systemd unit, consider using the `kill` or `stop` action instead to make use of systemd's restart policy. - **stop**: Stop the container. To remain backwards compatible, **none** is the default action. Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
@ -1641,3 +1641,8 @@ func AutocompleteSSH(cmd *cobra.Command, args []string, toComplete string) ([]st
|
|||||||
}
|
}
|
||||||
return []string{string(ssh.GolangMode), string(ssh.NativeMode)}, cobra.ShellCompDirectiveNoFileComp
|
return []string{string(ssh.GolangMode), string(ssh.NativeMode)}, cobra.ShellCompDirectiveNoFileComp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AutocompleteHealthOnFailure - action to take once the container turns unhealthy.
|
||||||
|
func AutocompleteHealthOnFailure(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
|
||||||
|
return define.SupportedHealthCheckOnFailureActions, cobra.ShellCompDirectiveNoFileComp
|
||||||
|
}
|
||||||
|
@ -208,6 +208,14 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
|
|||||||
)
|
)
|
||||||
_ = cmd.RegisterFlagCompletionFunc(healthTimeoutFlagName, completion.AutocompleteNone)
|
_ = cmd.RegisterFlagCompletionFunc(healthTimeoutFlagName, completion.AutocompleteNone)
|
||||||
|
|
||||||
|
healthOnFailureFlagName := "health-on-failure"
|
||||||
|
createFlags.StringVar(
|
||||||
|
&cf.HealthOnFailure,
|
||||||
|
healthOnFailureFlagName, "none",
|
||||||
|
"action to take once the container turns unhealthy",
|
||||||
|
)
|
||||||
|
_ = cmd.RegisterFlagCompletionFunc(healthOnFailureFlagName, AutocompleteHealthOnFailure)
|
||||||
|
|
||||||
createFlags.BoolVar(
|
createFlags.BoolVar(
|
||||||
&cf.HTTPProxy,
|
&cf.HTTPProxy,
|
||||||
"http-proxy", containerConfig.Containers.HTTPProxy,
|
"http-proxy", containerConfig.Containers.HTTPProxy,
|
||||||
|
8
docs/source/markdown/options/health-on-failure.md
Normal file
8
docs/source/markdown/options/health-on-failure.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
#### **--health-on-failure**=*action*
|
||||||
|
|
||||||
|
Action to take once the container transitions to an unhealthy state. The default is **none**.
|
||||||
|
|
||||||
|
- **none**: Take no action.
|
||||||
|
- **kill**: Kill the container.
|
||||||
|
- **restart**: Restart the container. Do not combine the `restart` action with the `--restart` flag. When running inside of a systemd unit, consider using the `kill` or `stop` action instead to make use of systemd's restart policy.
|
||||||
|
- **stop**: Stop the container.
|
@ -185,6 +185,8 @@ Read in a line delimited file of environment variables. See **Environment** note
|
|||||||
|
|
||||||
@@option health-interval
|
@@option health-interval
|
||||||
|
|
||||||
|
@@option health-on-failure
|
||||||
|
|
||||||
@@option health-retries
|
@@option health-retries
|
||||||
|
|
||||||
@@option health-start-period
|
@@option health-start-period
|
||||||
|
@ -221,6 +221,8 @@ Read in a line delimited file of environment variables. See **Environment** note
|
|||||||
|
|
||||||
@@option health-interval
|
@@option health-interval
|
||||||
|
|
||||||
|
@@option health-on-failure
|
||||||
|
|
||||||
@@option health-retries
|
@@option health-retries
|
||||||
|
|
||||||
@@option health-start-period
|
@@option health-start-period
|
||||||
|
@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/containers/common/libnetwork/types"
|
"github.com/containers/common/libnetwork/types"
|
||||||
"github.com/containers/common/pkg/secrets"
|
"github.com/containers/common/pkg/secrets"
|
||||||
"github.com/containers/image/v5/manifest"
|
"github.com/containers/image/v5/manifest"
|
||||||
|
"github.com/containers/podman/v4/libpod/define"
|
||||||
"github.com/containers/podman/v4/pkg/namespaces"
|
"github.com/containers/podman/v4/pkg/namespaces"
|
||||||
"github.com/containers/podman/v4/pkg/specgen"
|
"github.com/containers/podman/v4/pkg/specgen"
|
||||||
"github.com/containers/storage"
|
"github.com/containers/storage"
|
||||||
@ -392,6 +393,8 @@ type ContainerMiscConfig struct {
|
|||||||
Systemd *bool `json:"systemd,omitempty"`
|
Systemd *bool `json:"systemd,omitempty"`
|
||||||
// HealthCheckConfig has the health check command and related timings
|
// HealthCheckConfig has the health check command and related timings
|
||||||
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
|
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
|
||||||
|
// HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
|
||||||
|
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
|
||||||
// PreserveFDs is a number of additional file descriptors (in addition
|
// PreserveFDs is a number of additional file descriptors (in addition
|
||||||
// to 0, 1, 2) that will be passed to the executed process. The total FDs
|
// to 0, 1, 2) that will be passed to the executed process. The total FDs
|
||||||
// passed will be 3 + PreserveFDs.
|
// passed will be 3 + PreserveFDs.
|
||||||
|
@ -390,6 +390,8 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp
|
|||||||
// leak.
|
// leak.
|
||||||
ctrConfig.Healthcheck = c.config.HealthCheckConfig
|
ctrConfig.Healthcheck = c.config.HealthCheckConfig
|
||||||
|
|
||||||
|
ctrConfig.HealthcheckOnFailureAction = c.config.HealthCheckOnFailureAction.String()
|
||||||
|
|
||||||
ctrConfig.CreateCommand = c.config.CreateCommand
|
ctrConfig.CreateCommand = c.config.CreateCommand
|
||||||
|
|
||||||
ctrConfig.Timezone = c.config.Timezone
|
ctrConfig.Timezone = c.config.Timezone
|
||||||
|
@ -137,5 +137,9 @@ func (c *Container) validate() error {
|
|||||||
if c.config.SdNotifyMode == define.SdNotifyModeIgnore && len(c.config.SdNotifySocket) > 0 {
|
if c.config.SdNotifyMode == define.SdNotifyModeIgnore && len(c.config.SdNotifySocket) > 0 {
|
||||||
return fmt.Errorf("cannot set sd-notify socket %q with sd-notify mode %q", c.config.SdNotifySocket, c.config.SdNotifyMode)
|
return fmt.Errorf("cannot set sd-notify socket %q with sd-notify mode %q", c.config.SdNotifySocket, c.config.SdNotifyMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if c.config.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone && c.config.HealthCheckConfig == nil {
|
||||||
|
return fmt.Errorf("cannot set on-failure action to %s without a health check", c.config.HealthCheckOnFailureAction.String())
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -55,6 +55,8 @@ type InspectContainerConfig struct {
|
|||||||
StopSignal uint `json:"StopSignal"`
|
StopSignal uint `json:"StopSignal"`
|
||||||
// Configured healthcheck for the container
|
// Configured healthcheck for the container
|
||||||
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
|
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
|
||||||
|
// HealthcheckOnFailureAction defines an action to take once the container turns unhealthy.
|
||||||
|
HealthcheckOnFailureAction string `json:"HealthcheckOnFailureAction,omitempty"`
|
||||||
// CreateCommand is the full command plus arguments of the process the
|
// CreateCommand is the full command plus arguments of the process the
|
||||||
// container has been created with.
|
// container has been created with.
|
||||||
CreateCommand []string `json:"CreateCommand,omitempty"`
|
CreateCommand []string `json:"CreateCommand,omitempty"`
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
package define
|
package define
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// HealthCheckHealthy describes a healthy container
|
// HealthCheckHealthy describes a healthy container
|
||||||
HealthCheckHealthy string = "healthy"
|
HealthCheckHealthy string = "healthy"
|
||||||
@ -57,3 +62,72 @@ const (
|
|||||||
// HealthConfigTestCmdShell runs commands with the system's default shell
|
// HealthConfigTestCmdShell runs commands with the system's default shell
|
||||||
HealthConfigTestCmdShell = "CMD-SHELL"
|
HealthConfigTestCmdShell = "CMD-SHELL"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// HealthCheckOnFailureAction defines how Podman reacts when a container's health
|
||||||
|
// status turns unhealthy.
|
||||||
|
type HealthCheckOnFailureAction int
|
||||||
|
|
||||||
|
// Healthcheck on-failure actions.
|
||||||
|
const (
|
||||||
|
// HealthCheckOnFailureActionNonce instructs Podman to not react on an unhealthy status.
|
||||||
|
HealthCheckOnFailureActionNone = iota // Must be first iota for backwards compatibility
|
||||||
|
// HealthCheckOnFailureActionInvalid denotes an invalid on-failure policy.
|
||||||
|
HealthCheckOnFailureActionInvalid = iota
|
||||||
|
// HealthCheckOnFailureActionNonce instructs Podman to kill the container on an unhealthy status.
|
||||||
|
HealthCheckOnFailureActionKill = iota
|
||||||
|
// HealthCheckOnFailureActionNonce instructs Podman to restart the container on an unhealthy status.
|
||||||
|
HealthCheckOnFailureActionRestart = iota
|
||||||
|
// HealthCheckOnFailureActionNonce instructs Podman to stop the container on an unhealthy status.
|
||||||
|
HealthCheckOnFailureActionStop = iota
|
||||||
|
)
|
||||||
|
|
||||||
|
// String representations for on-failure actions.
|
||||||
|
const (
|
||||||
|
strHealthCheckOnFailureActionNone = "none"
|
||||||
|
strHealthCheckOnFailureActionInvalid = "invalid"
|
||||||
|
strHealthCheckOnFailureActionKill = "kill"
|
||||||
|
strHealthCheckOnFailureActionRestart = "restart"
|
||||||
|
strHealthCheckOnFailureActionStop = "stop"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SupportedHealthCheckOnFailureActions lists all supported healthcheck restart policies.
|
||||||
|
var SupportedHealthCheckOnFailureActions = []string{
|
||||||
|
strHealthCheckOnFailureActionNone,
|
||||||
|
strHealthCheckOnFailureActionKill,
|
||||||
|
strHealthCheckOnFailureActionRestart,
|
||||||
|
strHealthCheckOnFailureActionStop,
|
||||||
|
}
|
||||||
|
|
||||||
|
// String returns the string representation of the HealthCheckOnFailureAction.
|
||||||
|
func (h HealthCheckOnFailureAction) String() string {
|
||||||
|
switch h {
|
||||||
|
case HealthCheckOnFailureActionNone:
|
||||||
|
return strHealthCheckOnFailureActionNone
|
||||||
|
case HealthCheckOnFailureActionKill:
|
||||||
|
return strHealthCheckOnFailureActionKill
|
||||||
|
case HealthCheckOnFailureActionRestart:
|
||||||
|
return strHealthCheckOnFailureActionRestart
|
||||||
|
case HealthCheckOnFailureActionStop:
|
||||||
|
return strHealthCheckOnFailureActionStop
|
||||||
|
default:
|
||||||
|
return strHealthCheckOnFailureActionInvalid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseHealthCheckOnFailureAction parses the specified string into a HealthCheckOnFailureAction.
|
||||||
|
// An error is returned for an invalid input.
|
||||||
|
func ParseHealthCheckOnFailureAction(s string) (HealthCheckOnFailureAction, error) {
|
||||||
|
switch s {
|
||||||
|
case "", strHealthCheckOnFailureActionNone:
|
||||||
|
return HealthCheckOnFailureActionNone, nil
|
||||||
|
case strHealthCheckOnFailureActionKill:
|
||||||
|
return HealthCheckOnFailureActionKill, nil
|
||||||
|
case strHealthCheckOnFailureActionRestart:
|
||||||
|
return HealthCheckOnFailureActionRestart, nil
|
||||||
|
case strHealthCheckOnFailureActionStop:
|
||||||
|
return HealthCheckOnFailureActionStop, nil
|
||||||
|
default:
|
||||||
|
err := fmt.Errorf("invalid on-failure action %q for health check: supported actions are %s", s, strings.Join(SupportedHealthCheckOnFailureActions, ","))
|
||||||
|
return HealthCheckOnFailureActionInvalid, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -2,6 +2,7 @@ package libpod
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
@ -12,6 +13,7 @@ import (
|
|||||||
|
|
||||||
"github.com/containers/podman/v4/libpod/define"
|
"github.com/containers/podman/v4/libpod/define"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -29,9 +31,14 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
|
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
hcStatus, err := checkHealthCheckCanBeRun(container)
|
hcStatus, err := checkHealthCheckCanBeRun(container)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return container.runHealthCheck()
|
hcStatus, err := container.runHealthCheck()
|
||||||
|
if err := container.processHealthCheckStatus(hcStatus); err != nil {
|
||||||
|
return hcStatus, err
|
||||||
|
}
|
||||||
|
return hcStatus, err
|
||||||
}
|
}
|
||||||
return hcStatus, err
|
return hcStatus, err
|
||||||
}
|
}
|
||||||
@ -127,13 +134,45 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) {
|
|||||||
hcResult = define.HealthCheckFailure
|
hcResult = define.HealthCheckFailure
|
||||||
hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
|
hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
|
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
|
||||||
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
|
if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil {
|
||||||
return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
|
return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return hcResult, hcErr
|
return hcResult, hcErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error {
|
||||||
|
if status == define.HealthCheckSuccess {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch c.config.HealthCheckOnFailureAction {
|
||||||
|
case define.HealthCheckOnFailureActionNone: // Nothing to do
|
||||||
|
|
||||||
|
case define.HealthCheckOnFailureActionKill:
|
||||||
|
if err := c.Kill(uint(unix.SIGKILL)); err != nil {
|
||||||
|
return fmt.Errorf("killing container health-check turned unhealthy: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
case define.HealthCheckOnFailureActionRestart:
|
||||||
|
if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil {
|
||||||
|
return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
case define.HealthCheckOnFailureActionStop:
|
||||||
|
if err := c.Stop(); err != nil {
|
||||||
|
return fmt.Errorf("stopping container after health-check turned unhealthy: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
default: // Should not happen but better be safe than sorry
|
||||||
|
return fmt.Errorf("unsupported on-failure action %d", c.config.HealthCheckOnFailureAction)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
|
func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
|
||||||
cstate, err := c.State()
|
cstate, err := c.State()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -1473,6 +1473,17 @@ func WithHealthCheck(healthCheck *manifest.Schema2HealthConfig) CtrCreateOption
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithHealthCheckOnFailureAction adds an on-failure action to health-check config
|
||||||
|
func WithHealthCheckOnFailureAction(action define.HealthCheckOnFailureAction) CtrCreateOption {
|
||||||
|
return func(ctr *Container) error {
|
||||||
|
if ctr.valid {
|
||||||
|
return define.ErrCtrFinalized
|
||||||
|
}
|
||||||
|
ctr.config.HealthCheckOnFailureAction = action
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WithPreserveFDs forwards from the process running Libpod into the container
|
// WithPreserveFDs forwards from the process running Libpod into the container
|
||||||
// the given number of extra FDs (starting after the standard streams) to the created container
|
// the given number of extra FDs (starting after the standard streams) to the created container
|
||||||
func WithPreserveFDs(fd uint) CtrCreateOption {
|
func WithPreserveFDs(fd uint) CtrCreateOption {
|
||||||
|
@ -212,6 +212,7 @@ type ContainerCreateOptions struct {
|
|||||||
HealthRetries uint
|
HealthRetries uint
|
||||||
HealthStartPeriod string
|
HealthStartPeriod string
|
||||||
HealthTimeout string
|
HealthTimeout string
|
||||||
|
HealthOnFailure string
|
||||||
Hostname string `json:"hostname,omitempty"`
|
Hostname string `json:"hostname,omitempty"`
|
||||||
HTTPProxy bool
|
HTTPProxy bool
|
||||||
HostUsers []string
|
HostUsers []string
|
||||||
|
@ -515,6 +515,10 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
|
|||||||
logrus.Debugf("New container has a health check")
|
logrus.Debugf("New container has a health check")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
|
||||||
|
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))
|
||||||
|
}
|
||||||
|
|
||||||
if len(s.Secrets) != 0 {
|
if len(s.Secrets) != 0 {
|
||||||
manager, err := rt.SecretsManager()
|
manager, err := rt.SecretsManager()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
"github.com/containers/common/libimage"
|
"github.com/containers/common/libimage"
|
||||||
nettypes "github.com/containers/common/libnetwork/types"
|
nettypes "github.com/containers/common/libnetwork/types"
|
||||||
"github.com/containers/image/v5/manifest"
|
"github.com/containers/image/v5/manifest"
|
||||||
|
"github.com/containers/podman/v4/libpod/define"
|
||||||
"github.com/containers/storage/types"
|
"github.com/containers/storage/types"
|
||||||
spec "github.com/opencontainers/runtime-spec/specs-go"
|
spec "github.com/opencontainers/runtime-spec/specs-go"
|
||||||
)
|
)
|
||||||
@ -533,7 +534,8 @@ type ContainerResourceConfig struct {
|
|||||||
// ContainerHealthCheckConfig describes a container healthcheck with attributes
|
// ContainerHealthCheckConfig describes a container healthcheck with attributes
|
||||||
// like command, retries, interval, start period, and timeout.
|
// like command, retries, interval, start period, and timeout.
|
||||||
type ContainerHealthCheckConfig struct {
|
type ContainerHealthCheckConfig struct {
|
||||||
HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"`
|
HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"`
|
||||||
|
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"health_check_on_failure_action,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SpecGenerator creates an OCI spec and Libpod configuration options to create
|
// SpecGenerator creates an OCI spec and Libpod configuration options to create
|
||||||
|
@ -265,6 +265,13 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
|
|||||||
Test: []string{"NONE"},
|
Test: []string{"NONE"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
onFailureAction, err := define.ParseHealthCheckOnFailureAction(c.HealthOnFailure)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.HealthCheckOnFailureAction = onFailureAction
|
||||||
|
|
||||||
if err := setNamespaces(s, c); err != nil {
|
if err := setNamespaces(s, c); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -20,44 +20,8 @@ function _check_health {
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@test "podman healthcheck" {
|
@test "podman healthcheck" {
|
||||||
# Create an image with a healthcheck script; said script will
|
_build_health_check_image healthcheck_i
|
||||||
# pass until the file /uh-oh gets created (by us, via exec)
|
|
||||||
cat >${PODMAN_TMPDIR}/healthcheck <<EOF
|
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
if test -e /uh-oh; then
|
|
||||||
echo "Uh-oh on stdout!"
|
|
||||||
echo "Uh-oh on stderr!" >&2
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Life is Good on stdout"
|
|
||||||
echo "Life is Good on stderr" >&2
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat >${PODMAN_TMPDIR}/entrypoint <<EOF
|
|
||||||
#!/bin/sh
|
|
||||||
|
|
||||||
while :; do
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
EOF
|
|
||||||
|
|
||||||
cat >${PODMAN_TMPDIR}/Containerfile <<EOF
|
|
||||||
FROM $IMAGE
|
|
||||||
|
|
||||||
COPY healthcheck /healthcheck
|
|
||||||
COPY entrypoint /entrypoint
|
|
||||||
|
|
||||||
RUN chmod 755 /healthcheck /entrypoint
|
|
||||||
|
|
||||||
CMD ["/entrypoint"]
|
|
||||||
EOF
|
|
||||||
|
|
||||||
run_podman build -t healthcheck_i ${PODMAN_TMPDIR}
|
|
||||||
|
|
||||||
# Run that healthcheck image.
|
# Run that healthcheck image.
|
||||||
run_podman run -d --name healthcheck_c \
|
run_podman run -d --name healthcheck_c \
|
||||||
@ -66,6 +30,9 @@ EOF
|
|||||||
--health-retries 3 \
|
--health-retries 3 \
|
||||||
healthcheck_i
|
healthcheck_i
|
||||||
|
|
||||||
|
run_podman inspect healthcheck_c --format "{{.Config.HealthcheckOnFailureAction}}"
|
||||||
|
is "$output" "none" "default on-failure action is none"
|
||||||
|
|
||||||
# We can't check for 'starting' because a 1-second interval is too
|
# We can't check for 'starting' because a 1-second interval is too
|
||||||
# short; it could run healthcheck before we get to our first check.
|
# short; it could run healthcheck before we get to our first check.
|
||||||
#
|
#
|
||||||
@ -109,4 +76,59 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
|
|||||||
run_podman rmi healthcheck_i
|
run_podman rmi healthcheck_i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@test "podman healthcheck --health-on-failure" {
|
||||||
|
run_podman 125 create --health-on-failure=kill $IMAGE
|
||||||
|
is "$output" "Error: cannot set on-failure action to kill without a health check"
|
||||||
|
|
||||||
|
ctr="healthcheck_c"
|
||||||
|
img="healthcheck_i"
|
||||||
|
|
||||||
|
for policy in none kill restart stop;do
|
||||||
|
if [[ $policy == "none" ]];then
|
||||||
|
# Do not remove the /uh-oh file for `none` as we want to
|
||||||
|
# demonstrate that no action was taken
|
||||||
|
_build_health_check_image $img
|
||||||
|
else
|
||||||
|
_build_health_check_image $img cleanfile
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Run that healthcheck image.
|
||||||
|
run_podman run -d --name $ctr \
|
||||||
|
--health-cmd /healthcheck \
|
||||||
|
--health-on-failure=$policy \
|
||||||
|
$img
|
||||||
|
|
||||||
|
# healthcheck should succeed
|
||||||
|
run_podman healthcheck run $ctr
|
||||||
|
|
||||||
|
# Now cause the healthcheck to fail
|
||||||
|
run_podman exec $ctr touch /uh-oh
|
||||||
|
|
||||||
|
# healthcheck should now fail, with exit status 1 and 'unhealthy' output
|
||||||
|
run_podman 1 healthcheck run $ctr
|
||||||
|
# FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists
|
||||||
|
is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
|
||||||
|
|
||||||
|
run_podman inspect $ctr --format "{{.State.Status}} {{.Config.HealthcheckOnFailureAction}}"
|
||||||
|
if [[ $policy == "restart" ]];then
|
||||||
|
# Container has been restarted and health check works again
|
||||||
|
is "$output" "running $policy" "container has been restarted"
|
||||||
|
run_podman healthcheck run $ctr
|
||||||
|
elif [[ $policy == "none" ]];then
|
||||||
|
# Container is still running and health check still broken
|
||||||
|
is "$output" "running $policy" "container continued running"
|
||||||
|
run_podman 1 healthcheck run $ctr
|
||||||
|
# FIXME: #15691 - `healthcheck run` may emit an error log that the timer already exists
|
||||||
|
is "$output" ".*unhealthy.*" "output from 'podman healthcheck run'"
|
||||||
|
else
|
||||||
|
# kill and stop yield the container into a non-running state
|
||||||
|
is "$output" ".* $policy" "container was stopped/killed"
|
||||||
|
assert "$output" != "running $policy"
|
||||||
|
fi
|
||||||
|
|
||||||
|
run_podman rm -f -t0 $ctr
|
||||||
|
run_podman rmi -f $img
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
# vim: filetype=sh
|
# vim: filetype=sh
|
||||||
|
@ -304,6 +304,57 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
|
|||||||
run_podman network rm -f $netname
|
run_podman network rm -f $netname
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@test "podman create --health-on-failure=kill" {
|
||||||
|
img="healthcheck_i"
|
||||||
|
_build_health_check_image $img
|
||||||
|
|
||||||
|
cname=$(random_string)
|
||||||
|
run_podman create --name $cname \
|
||||||
|
--health-cmd /healthcheck \
|
||||||
|
--health-on-failure=kill \
|
||||||
|
--restart=on-failure \
|
||||||
|
$img
|
||||||
|
|
||||||
|
# run container in systemd unit
|
||||||
|
service_setup
|
||||||
|
|
||||||
|
run_podman container inspect $cname --format "{{.ID}}"
|
||||||
|
oldID="$output"
|
||||||
|
|
||||||
|
run_podman healthcheck run $cname
|
||||||
|
|
||||||
|
# Now cause the healthcheck to fail
|
||||||
|
run_podman exec $cname touch /uh-oh
|
||||||
|
|
||||||
|
# healthcheck should now fail, with exit status 1 and 'unhealthy' output
|
||||||
|
run_podman 1 healthcheck run $cname
|
||||||
|
is "$output" "unhealthy" "output from 'podman healthcheck run'"
|
||||||
|
|
||||||
|
# What is expected to happen now:
|
||||||
|
# 1) The container gets killed as the health check has failed
|
||||||
|
# 2) Systemd restarts the service as the restart policy is set to "on-failure"
|
||||||
|
# 3) The /uh-oh file is gone and $cname has another ID
|
||||||
|
|
||||||
|
# Wait at most 10 seconds for the service to be restarted
|
||||||
|
local timeout=10
|
||||||
|
while [[ $timeout -gt 1 ]]; do
|
||||||
|
run_podman '?' container inspect $cname
|
||||||
|
if [[ $status == 0 ]]; then
|
||||||
|
if [[ "$output" != "$oldID" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
let timeout=$timeout-1
|
||||||
|
done
|
||||||
|
|
||||||
|
run_podman healthcheck run $cname
|
||||||
|
|
||||||
|
# stop systemd container
|
||||||
|
service_cleanup
|
||||||
|
run_podman rmi -f $img
|
||||||
|
}
|
||||||
|
|
||||||
@test "podman-kube@.service template" {
|
@test "podman-kube@.service template" {
|
||||||
install_kube_template
|
install_kube_template
|
||||||
# Create the YAMl file
|
# Create the YAMl file
|
||||||
|
@ -894,5 +894,59 @@ function _podman_commands() {
|
|||||||
awk '/^Available Commands:/{ok=1;next}/^Options:/{ok=0}ok { print $1 }' <<<"$output" | grep .
|
awk '/^Available Commands:/{ok=1;next}/^Options:/{ok=0}ok { print $1 }' <<<"$output" | grep .
|
||||||
}
|
}
|
||||||
|
|
||||||
|
###############################
|
||||||
|
# _build_health_check_image # Builds a container image with a configured health check
|
||||||
|
###############################
|
||||||
|
#
|
||||||
|
# The health check will fail once the /uh-oh file exists.
|
||||||
|
#
|
||||||
|
# First argument is the desired name of the image
|
||||||
|
# Second argument, if present and non-null, forces removal of the /uh-oh file once the check failed; this way the container can be restarted
|
||||||
|
#
|
||||||
|
|
||||||
|
function _build_health_check_image {
|
||||||
|
local imagename="$1"
|
||||||
|
local cleanfile=""
|
||||||
|
|
||||||
|
if [[ ! -z "$2" ]]; then
|
||||||
|
cleanfile="rm -f /uh-oh"
|
||||||
|
fi
|
||||||
|
# Create an image with a healthcheck script; said script will
|
||||||
|
# pass until the file /uh-oh gets created (by us, via exec)
|
||||||
|
cat >${PODMAN_TMPDIR}/healthcheck <<EOF
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
if test -e /uh-oh; then
|
||||||
|
echo "Uh-oh on stdout!"
|
||||||
|
echo "Uh-oh on stderr!" >&2
|
||||||
|
${cleanfile}
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
|
echo "Life is Good on stdout"
|
||||||
|
echo "Life is Good on stderr" >&2
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >${PODMAN_TMPDIR}/entrypoint <<EOF
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
trap 'echo Received SIGTERM, finishing; exit' SIGTERM; echo WAITING; while :; do sleep 0.1; done
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >${PODMAN_TMPDIR}/Containerfile <<EOF
|
||||||
|
FROM $IMAGE
|
||||||
|
|
||||||
|
COPY healthcheck /healthcheck
|
||||||
|
COPY entrypoint /entrypoint
|
||||||
|
|
||||||
|
RUN chmod 755 /healthcheck /entrypoint
|
||||||
|
|
||||||
|
CMD ["/entrypoint"]
|
||||||
|
EOF
|
||||||
|
|
||||||
|
run_podman build -t $imagename ${PODMAN_TMPDIR}
|
||||||
|
}
|
||||||
|
|
||||||
# END miscellaneous tools
|
# END miscellaneous tools
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
Reference in New Issue
Block a user