diff --git a/cmd/podman/common/completion.go b/cmd/podman/common/completion.go index b3a816aa4b..60d056aaae 100644 --- a/cmd/podman/common/completion.go +++ b/cmd/podman/common/completion.go @@ -1641,3 +1641,8 @@ func AutocompleteSSH(cmd *cobra.Command, args []string, toComplete string) ([]st } return []string{string(ssh.GolangMode), string(ssh.NativeMode)}, cobra.ShellCompDirectiveNoFileComp } + +// AutocompleteHealthOnFailure - action to take once the container turns unhealthy. +func AutocompleteHealthOnFailure(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + return define.SupportedHealthCheckOnFailureActions, cobra.ShellCompDirectiveNoFileComp +} diff --git a/cmd/podman/common/create.go b/cmd/podman/common/create.go index a2bc45b9ec..8fff03773a 100644 --- a/cmd/podman/common/create.go +++ b/cmd/podman/common/create.go @@ -208,6 +208,14 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, ) _ = cmd.RegisterFlagCompletionFunc(healthTimeoutFlagName, completion.AutocompleteNone) + healthOnFailureFlagName := "health-on-failure" + createFlags.StringVar( + &cf.HealthOnFailure, + healthOnFailureFlagName, "none", + "action to take once the container turns unhealthy", + ) + _ = cmd.RegisterFlagCompletionFunc(healthOnFailureFlagName, AutocompleteHealthOnFailure) + createFlags.BoolVar( &cf.HTTPProxy, "http-proxy", containerConfig.Containers.HTTPProxy, diff --git a/docs/source/markdown/options/health-on-failure.md b/docs/source/markdown/options/health-on-failure.md new file mode 100644 index 0000000000..c25a1c574f --- /dev/null +++ b/docs/source/markdown/options/health-on-failure.md @@ -0,0 +1,8 @@ +#### **--health-on-failure**=*action* + +Action to take once the container transitions to an unhealthy state. The default is **none**. + +- **none**: Take no action. +- **kill**: Kill the container. +- **restart**: Restart the container. Do not combine the `restart` action with the `--restart` flag. When running inside of a systemd unit, consider using the `kill` or `stop` action instead to make use of systemd's restart policy. +- **stop**: Stop the container. diff --git a/docs/source/markdown/podman-create.1.md.in b/docs/source/markdown/podman-create.1.md.in index a20aeafcda..f74429848e 100644 --- a/docs/source/markdown/podman-create.1.md.in +++ b/docs/source/markdown/podman-create.1.md.in @@ -185,6 +185,8 @@ Read in a line delimited file of environment variables. See **Environment** note @@option health-interval +@@option health-on-failure + @@option health-retries @@option health-start-period diff --git a/docs/source/markdown/podman-run.1.md.in b/docs/source/markdown/podman-run.1.md.in index 2bb3098e25..86066ad9ce 100644 --- a/docs/source/markdown/podman-run.1.md.in +++ b/docs/source/markdown/podman-run.1.md.in @@ -221,6 +221,8 @@ Read in a line delimited file of environment variables. See **Environment** note @@option health-interval +@@option health-on-failure + @@option health-retries @@option health-start-period diff --git a/libpod/container_config.go b/libpod/container_config.go index bd98166517..f3585d22c6 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -7,6 +7,7 @@ import ( "github.com/containers/common/libnetwork/types" "github.com/containers/common/pkg/secrets" "github.com/containers/image/v5/manifest" + "github.com/containers/podman/v4/libpod/define" "github.com/containers/podman/v4/pkg/namespaces" "github.com/containers/podman/v4/pkg/specgen" "github.com/containers/storage" @@ -392,6 +393,8 @@ type ContainerMiscConfig struct { Systemd *bool `json:"systemd,omitempty"` // HealthCheckConfig has the health check command and related timings HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"` + // HealthCheckOnFailureAction defines an action to take once the container turns unhealthy. + HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"` // PreserveFDs is a number of additional file descriptors (in addition // to 0, 1, 2) that will be passed to the executed process. The total FDs // passed will be 3 + PreserveFDs. diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 5e2ab28180..ad8bae2862 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -390,6 +390,8 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp // leak. ctrConfig.Healthcheck = c.config.HealthCheckConfig + ctrConfig.HealthcheckOnFailureAction = c.config.HealthCheckOnFailureAction.String() + ctrConfig.CreateCommand = c.config.CreateCommand ctrConfig.Timezone = c.config.Timezone diff --git a/libpod/container_validate.go b/libpod/container_validate.go index da33f6db7c..f4611ecce4 100644 --- a/libpod/container_validate.go +++ b/libpod/container_validate.go @@ -137,5 +137,9 @@ func (c *Container) validate() error { if c.config.SdNotifyMode == define.SdNotifyModeIgnore && len(c.config.SdNotifySocket) > 0 { return fmt.Errorf("cannot set sd-notify socket %q with sd-notify mode %q", c.config.SdNotifySocket, c.config.SdNotifyMode) } + + if c.config.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone && c.config.HealthCheckConfig == nil { + return fmt.Errorf("cannot set on-failure action to %s without a health check", c.config.HealthCheckOnFailureAction.String()) + } return nil } diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 5982d684c5..da5c58f278 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -55,6 +55,8 @@ type InspectContainerConfig struct { StopSignal uint `json:"StopSignal"` // Configured healthcheck for the container Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"` + // HealthcheckOnFailureAction defines an action to take once the container turns unhealthy. + HealthcheckOnFailureAction string `json:"HealthcheckOnFailureAction,omitempty"` // CreateCommand is the full command plus arguments of the process the // container has been created with. CreateCommand []string `json:"CreateCommand,omitempty"` diff --git a/libpod/define/healthchecks.go b/libpod/define/healthchecks.go index f712743500..274e025611 100644 --- a/libpod/define/healthchecks.go +++ b/libpod/define/healthchecks.go @@ -1,5 +1,10 @@ package define +import ( + "fmt" + "strings" +) + const ( // HealthCheckHealthy describes a healthy container HealthCheckHealthy string = "healthy" @@ -57,3 +62,72 @@ const ( // HealthConfigTestCmdShell runs commands with the system's default shell HealthConfigTestCmdShell = "CMD-SHELL" ) + +// HealthCheckOnFailureAction defines how Podman reacts when a container's health +// status turns unhealthy. +type HealthCheckOnFailureAction int + +// Healthcheck on-failure actions. +const ( + // HealthCheckOnFailureActionNonce instructs Podman to not react on an unhealthy status. + HealthCheckOnFailureActionNone = iota // Must be first iota for backwards compatibility + // HealthCheckOnFailureActionInvalid denotes an invalid on-failure policy. + HealthCheckOnFailureActionInvalid = iota + // HealthCheckOnFailureActionNonce instructs Podman to kill the container on an unhealthy status. + HealthCheckOnFailureActionKill = iota + // HealthCheckOnFailureActionNonce instructs Podman to restart the container on an unhealthy status. + HealthCheckOnFailureActionRestart = iota + // HealthCheckOnFailureActionNonce instructs Podman to stop the container on an unhealthy status. + HealthCheckOnFailureActionStop = iota +) + +// String representations for on-failure actions. +const ( + strHealthCheckOnFailureActionNone = "none" + strHealthCheckOnFailureActionInvalid = "invalid" + strHealthCheckOnFailureActionKill = "kill" + strHealthCheckOnFailureActionRestart = "restart" + strHealthCheckOnFailureActionStop = "stop" +) + +// SupportedHealthCheckOnFailureActions lists all supported healthcheck restart policies. +var SupportedHealthCheckOnFailureActions = []string{ + strHealthCheckOnFailureActionNone, + strHealthCheckOnFailureActionKill, + strHealthCheckOnFailureActionRestart, + strHealthCheckOnFailureActionStop, +} + +// String returns the string representation of the HealthCheckOnFailureAction. +func (h HealthCheckOnFailureAction) String() string { + switch h { + case HealthCheckOnFailureActionNone: + return strHealthCheckOnFailureActionNone + case HealthCheckOnFailureActionKill: + return strHealthCheckOnFailureActionKill + case HealthCheckOnFailureActionRestart: + return strHealthCheckOnFailureActionRestart + case HealthCheckOnFailureActionStop: + return strHealthCheckOnFailureActionStop + default: + return strHealthCheckOnFailureActionInvalid + } +} + +// ParseHealthCheckOnFailureAction parses the specified string into a HealthCheckOnFailureAction. +// An error is returned for an invalid input. +func ParseHealthCheckOnFailureAction(s string) (HealthCheckOnFailureAction, error) { + switch s { + case "", strHealthCheckOnFailureActionNone: + return HealthCheckOnFailureActionNone, nil + case strHealthCheckOnFailureActionKill: + return HealthCheckOnFailureActionKill, nil + case strHealthCheckOnFailureActionRestart: + return HealthCheckOnFailureActionRestart, nil + case strHealthCheckOnFailureActionStop: + return HealthCheckOnFailureActionStop, nil + default: + err := fmt.Errorf("invalid on-failure action %q for health check: supported actions are %s", s, strings.Join(SupportedHealthCheckOnFailureActions, ",")) + return HealthCheckOnFailureActionInvalid, err + } +} diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index 9b9d12b170..e835af9f06 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -2,6 +2,7 @@ package libpod import ( "bufio" + "context" "errors" "fmt" "io/ioutil" @@ -12,6 +13,7 @@ import ( "github.com/containers/podman/v4/libpod/define" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) const ( @@ -29,9 +31,14 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) { if err != nil { return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err) } + hcStatus, err := checkHealthCheckCanBeRun(container) if err == nil { - return container.runHealthCheck() + hcStatus, err := container.runHealthCheck() + if err := container.processHealthCheckStatus(hcStatus); err != nil { + return hcStatus, err + } + return hcStatus, err } return hcStatus, err } @@ -127,13 +134,45 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, error) { hcResult = define.HealthCheckFailure hcErr = fmt.Errorf("healthcheck command exceeded timeout of %s", c.HealthCheckConfig().Timeout.String()) } + hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog) if err := c.updateHealthCheckLog(hcl, inStartPeriod); err != nil { return hcResult, fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err) } + return hcResult, hcErr } +func (c *Container) processHealthCheckStatus(status define.HealthCheckStatus) error { + if status == define.HealthCheckSuccess { + return nil + } + + switch c.config.HealthCheckOnFailureAction { + case define.HealthCheckOnFailureActionNone: // Nothing to do + + case define.HealthCheckOnFailureActionKill: + if err := c.Kill(uint(unix.SIGKILL)); err != nil { + return fmt.Errorf("killing container health-check turned unhealthy: %w", err) + } + + case define.HealthCheckOnFailureActionRestart: + if err := c.RestartWithTimeout(context.Background(), c.config.StopTimeout); err != nil { + return fmt.Errorf("restarting container after health-check turned unhealthy: %w", err) + } + + case define.HealthCheckOnFailureActionStop: + if err := c.Stop(); err != nil { + return fmt.Errorf("stopping container after health-check turned unhealthy: %w", err) + } + + default: // Should not happen but better be safe than sorry + return fmt.Errorf("unsupported on-failure action %d", c.config.HealthCheckOnFailureAction) + } + + return nil +} + func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) { cstate, err := c.State() if err != nil { diff --git a/libpod/options.go b/libpod/options.go index 56d5265d23..71ad3d11e5 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1473,6 +1473,17 @@ func WithHealthCheck(healthCheck *manifest.Schema2HealthConfig) CtrCreateOption } } +// WithHealthCheckOnFailureAction adds an on-failure action to health-check config +func WithHealthCheckOnFailureAction(action define.HealthCheckOnFailureAction) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + ctr.config.HealthCheckOnFailureAction = action + return nil + } +} + // WithPreserveFDs forwards from the process running Libpod into the container // the given number of extra FDs (starting after the standard streams) to the created container func WithPreserveFDs(fd uint) CtrCreateOption { diff --git a/pkg/domain/entities/pods.go b/pkg/domain/entities/pods.go index 55e2fd5747..a059cd7b5b 100644 --- a/pkg/domain/entities/pods.go +++ b/pkg/domain/entities/pods.go @@ -212,6 +212,7 @@ type ContainerCreateOptions struct { HealthRetries uint HealthStartPeriod string HealthTimeout string + HealthOnFailure string Hostname string `json:"hostname,omitempty"` HTTPProxy bool HostUsers []string diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index 4d5ac22adb..8900d2e5dd 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -515,6 +515,10 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l logrus.Debugf("New container has a health check") } + if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone { + options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction)) + } + if len(s.Secrets) != 0 { manager, err := rt.SecretsManager() if err != nil { diff --git a/pkg/specgen/specgen.go b/pkg/specgen/specgen.go index 51b6736a9b..34418c1326 100644 --- a/pkg/specgen/specgen.go +++ b/pkg/specgen/specgen.go @@ -9,6 +9,7 @@ import ( "github.com/containers/common/libimage" nettypes "github.com/containers/common/libnetwork/types" "github.com/containers/image/v5/manifest" + "github.com/containers/podman/v4/libpod/define" "github.com/containers/storage/types" spec "github.com/opencontainers/runtime-spec/specs-go" ) @@ -533,7 +534,8 @@ type ContainerResourceConfig struct { // ContainerHealthCheckConfig describes a container healthcheck with attributes // like command, retries, interval, start period, and timeout. type ContainerHealthCheckConfig struct { - HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"` + HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"` + HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"health_check_on_failure_action,omitempty"` } // SpecGenerator creates an OCI spec and Libpod configuration options to create diff --git a/pkg/specgenutil/specgen.go b/pkg/specgenutil/specgen.go index 439a13385b..3189926b2e 100644 --- a/pkg/specgenutil/specgen.go +++ b/pkg/specgenutil/specgen.go @@ -265,6 +265,13 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions Test: []string{"NONE"}, } } + + onFailureAction, err := define.ParseHealthCheckOnFailureAction(c.HealthOnFailure) + if err != nil { + return err + } + s.HealthCheckOnFailureAction = onFailureAction + if err := setNamespaces(s, c); err != nil { return err } diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index c502ad6690..00ec1dd794 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -20,44 +20,8 @@ function _check_health { done } - @test "podman healthcheck" { - # Create an image with a healthcheck script; said script will - # pass until the file /uh-oh gets created (by us, via exec) - cat >${PODMAN_TMPDIR}/healthcheck <&2 - exit 1 -else - echo "Life is Good on stdout" - echo "Life is Good on stderr" >&2 - exit 0 -fi -EOF - - cat >${PODMAN_TMPDIR}/entrypoint <${PODMAN_TMPDIR}/Containerfile <${PODMAN_TMPDIR}/healthcheck <&2 + ${cleanfile} + exit 1 +else + echo "Life is Good on stdout" + echo "Life is Good on stderr" >&2 + exit 0 +fi +EOF + + cat >${PODMAN_TMPDIR}/entrypoint <${PODMAN_TMPDIR}/Containerfile <