Add support for startup healthchecks

Startup healthchecks are similar to K8S startup probes, in that
they are a separate check from the regular healthcheck that runs
before it. If the startup healthcheck fails repeatedly, the
associated container is restarted.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
Matthew Heon
2022-04-15 19:22:12 -04:00
parent 935c8eb5ca
commit d16129330d
24 changed files with 551 additions and 147 deletions

View File

@ -180,7 +180,7 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
createFlags.StringVar(
&cf.HealthInterval,
healthIntervalFlagName, define.DefaultHealthCheckInterval,
"set an interval for the healthchecks (a value of disable results in no automatic timer setup)",
"set an interval for the healthcheck (a value of disable results in no automatic timer setup)",
)
_ = cmd.RegisterFlagCompletionFunc(healthIntervalFlagName, completion.AutocompleteNone)
@ -428,6 +428,46 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
)
_ = cmd.RegisterFlagCompletionFunc(secretFlagName, AutocompleteSecrets)
startupHCCmdFlagName := "health-startup-cmd"
createFlags.StringVar(
&cf.StartupHCCmd,
startupHCCmdFlagName, "",
"Set a startup healthcheck command for the container",
)
_ = cmd.RegisterFlagCompletionFunc(startupHCCmdFlagName, completion.AutocompleteNone)
startupHCIntervalFlagName := "health-startup-interval"
createFlags.StringVar(
&cf.StartupHCInterval,
startupHCIntervalFlagName, define.DefaultHealthCheckInterval,
"Set an interval for the startup healthcheck",
)
_ = cmd.RegisterFlagCompletionFunc(startupHCIntervalFlagName, completion.AutocompleteNone)
startupHCRetriesFlagName := "health-startup-retries"
createFlags.UintVar(
&cf.StartupHCRetries,
startupHCRetriesFlagName, 0,
"Set the maximum number of retries before the startup healthcheck will restart the container",
)
_ = cmd.RegisterFlagCompletionFunc(startupHCRetriesFlagName, completion.AutocompleteNone)
startupHCSuccessesFlagName := "health-startup-success"
createFlags.UintVar(
&cf.StartupHCSuccesses,
startupHCSuccessesFlagName, 0,
"Set the number of consecutive successes before the startup healthcheck is marked as successful and the normal healthcheck begins (0 indicates any success will start the regular healthcheck)",
)
_ = cmd.RegisterFlagCompletionFunc(startupHCSuccessesFlagName, completion.AutocompleteNone)
startupHCTimeoutFlagName := "health-startup-timeout"
createFlags.StringVar(
&cf.StartupHCTimeout,
startupHCTimeoutFlagName, define.DefaultHealthCheckTimeout,
"Set the maximum amount of time that the startup healthcheck may take before it is considered failed",
)
_ = cmd.RegisterFlagCompletionFunc(startupHCTimeoutFlagName, completion.AutocompleteNone)
stopSignalFlagName := "stop-signal"
createFlags.StringVar(
&cf.StopSignal,

View File

@ -35,7 +35,7 @@ func run(cmd *cobra.Command, args []string) error {
if err != nil {
return err
}
if response.Status == define.HealthCheckUnhealthy {
if response.Status == define.HealthCheckUnhealthy || response.Status == define.HealthCheckStarting {
registry.SetExitCode(1)
fmt.Println(response.Status)
}

View File

@ -0,0 +1,11 @@
####> This option file is used in:
####> podman create, run
####> If you edit this file, make sure your changes
####> are applicable to all of those.
#### **--health-startup-cmd**=*"command"* | *'["command", "arg1", ...]'*
Set a startup healthcheck command for a container. This command will be executed inside the container and is used to gate the regular
healthcheck. When the startup command succeeds, the regular healthcheck will begin and the startup healthcheck will cease. Optionally,
if the command fails for a set number of attempts, the container will be restarted. A startup healthcheck can be used to ensure that
containers with an extended startup period are not marked as unhealthy until they are fully started. Startup healthchecks can only be
used when a regular healthcheck (from the container's image or the **--health-cmd** option) is also set.

View File

@ -0,0 +1,7 @@
####> This option file is used in:
####> podman create, run
####> If you edit this file, make sure your changes
####> are applicable to all of those.
#### **--health-startup-interval**=*interval*
Set an interval for the startup healthcheck. An _interval_ of **disable** results in no automatic timer setup. The default is **30s**.

View File

@ -0,0 +1,8 @@
####> This option file is used in:
####> podman create, run
####> If you edit this file, make sure your changes
####> are applicable to all of those.
#### **--health-startup-retries**=*retries*
The number of attempts allowed before the startup healthcheck restarts the container. If set to **0**, the container will never be
restarted. The default is **0**.

View File

@ -0,0 +1,8 @@
####> This option file is used in:
####> podman create, run
####> If you edit this file, make sure your changes
####> are applicable to all of those.
#### **--health-startup-success**=*retries*
The number of successful runs required before the startup healthcheck will succeed and the regular healthcheck will begin. A value
of **0** means that any success will begin the regular healthcheck. The default is **0**.

View File

@ -0,0 +1,8 @@
####> This option file is used in:
####> podman create, run
####> If you edit this file, make sure your changes
####> are applicable to all of those.
#### **--health-startup-timeout**=*timeout*
The maximum time a startup healthcheck command has to complete before it is marked as failed. The value can be expressed in a time
format like **2m3s**. The default value is **30s**.

View File

@ -172,6 +172,16 @@ See [**Environment**](#environment) note below for precedence and examples.
@@option health-start-period
@@option health-startup-cmd
@@option health-startup-interval
@@option health-startup-retries
@@option health-startup-success
@@option health-startup-timeout
@@option health-timeout
#### **--help**

View File

@ -204,6 +204,16 @@ See [**Environment**](#environment) note below for precedence and examples.
@@option health-start-period
@@option health-startup-cmd
@@option health-startup-interval
@@option health-startup-retries
@@option health-startup-success
@@option health-startup-timeout
@@option health-timeout
#### **--help**

View File

@ -200,6 +200,18 @@ type ContainerState struct {
// (only by restart policy).
RestartCount uint `json:"restartCount,omitempty"`
// StartupHCPassed indicates that the startup healthcheck has
// succeeded and the main healthcheck can begin.
StartupHCPassed bool `json:"startupHCPassed,omitempty"`
// StartupHCSuccessCount indicates the number of successes of the
// startup healthcheck. A startup HC can require more than one success
// to be marked as passed.
StartupHCSuccessCount int `json:"startupHCSuccessCount,omitempty"`
// StartupHCFailureCount indicates the number of failures of the startup
// healthcheck. The container will be restarted if this exceed a set
// number in the startup HC config.
StartupHCFailureCount int `json:"startupHCFailureCount,omitempty"`
// ExtensionStageHooks holds hooks which will be executed by libpod
// and not delegated to the OCI runtime.
ExtensionStageHooks map[string][]spec.Hook `json:"extensionStageHooks,omitempty"`
@ -929,6 +941,20 @@ func (c *Container) StoppedByUser() (bool, error) {
return c.state.StoppedByUser, nil
}
// StartupHCPassed returns whether the container's startup healthcheck passed.
func (c *Container) StartupHCPassed() (bool, error) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
return false, err
}
}
return c.state.StartupHCPassed, nil
}
// Misc Accessors
// Most will require locking

View File

@ -395,6 +395,10 @@ type ContainerMiscConfig struct {
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
// HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
// StartupHealthCheckConfig is the configuration of the startup
// healthcheck for the container. This will run before the regular HC
// runs, and when it passes the regular HC will be activated.
StartupHealthCheckConfig *define.StartupHealthCheck `json:"startupHealthCheck,omitempty"`
// PreserveFDs is a number of additional file descriptors (in addition
// to 0, 1, 2) that will be passed to the executed process. The total FDs
// passed will be 3 + PreserveFDs.

View File

@ -622,6 +622,9 @@ func resetState(state *ContainerState) {
state.CheckpointPath = ""
state.CheckpointLog = ""
state.RestoreLog = ""
state.StartupHCPassed = false
state.StartupHCSuccessCount = 0
state.StartupHCFailureCount = 0
}
// Refresh refreshes the container's state after a restart.
@ -1072,6 +1075,9 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
c.state.State = define.ContainerStateCreated
c.state.StoppedByUser = false
c.state.RestartPolicyMatch = false
c.state.StartupHCFailureCount = 0
c.state.StartupHCSuccessCount = 0
c.state.StartupHCPassed = false
if !retainRetries {
c.state.RestartCount = 0
@ -1091,7 +1097,11 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
}
if c.config.HealthCheckConfig != nil {
if err := c.createTimer(); err != nil {
timer := c.config.HealthCheckConfig.Interval.String()
if c.config.StartupHealthCheckConfig != nil {
timer = c.config.StartupHealthCheckConfig.Interval.String()
}
if err := c.createTimer(timer, c.config.StartupHealthCheckConfig != nil); err != nil {
logrus.Error(err)
}
}
@ -1244,7 +1254,7 @@ func (c *Container) start() error {
if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil {
logrus.Error(err)
}
if err := c.startTimer(); err != nil {
if err := c.startTimer(c.config.StartupHealthCheckConfig != nil); err != nil {
logrus.Error(err)
}
}
@ -1422,7 +1432,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
return err
}
if c.config.HealthCheckConfig != nil {
if err := c.removeTransientFiles(context.Background()); err != nil {
if err := c.removeTransientFiles(context.Background(), c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
logrus.Error(err.Error())
}
}
@ -1859,7 +1869,7 @@ func (c *Container) cleanup(ctx context.Context) error {
// Remove healthcheck unit/timer file if it execs
if c.config.HealthCheckConfig != nil {
if err := c.removeTransientFiles(ctx); err != nil {
if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
logrus.Errorf("Removing timer for container %s healthcheck: %v", c.ID(), err)
}
}

View File

@ -156,6 +156,11 @@ func (c *Container) validate() error {
}
}
// Cannot set startup HC without a healthcheck
if c.config.HealthCheckConfig == nil && c.config.StartupHealthCheckConfig != nil {
return fmt.Errorf("cannot set a startup healthcheck when there is no regular healthcheck: %w", define.ErrInvalidArg)
}
return nil
}

View File

@ -3,6 +3,8 @@ package define
import (
"fmt"
"strings"
"github.com/containers/image/v5/manifest"
)
const (
@ -38,6 +40,9 @@ const (
HealthCheckInternalError HealthCheckStatus = iota
// HealthCheckDefined means the healthcheck was found on the container
HealthCheckDefined HealthCheckStatus = iota
// HealthCheckStartup means the healthcheck was unhealthy, but is still
// either within the startup HC or the startup period of the healthcheck
HealthCheckStartup HealthCheckStatus = iota
)
// Healthcheck defaults. These are used both in the cli as well in
@ -131,3 +136,12 @@ func ParseHealthCheckOnFailureAction(s string) (HealthCheckOnFailureAction, erro
return HealthCheckOnFailureActionInvalid, err
}
}
// StartupHealthCheck is the configuration of a startup healthcheck.
type StartupHealthCheck struct {
manifest.Schema2HealthConfig
// Successes are the number of successes required to mark the startup HC
// as passed.
// If set to 0, a single success will mark the HC as passed.
Successes int `json:",omitempty"`
}

View File

@ -25,7 +25,7 @@ const (
// HealthCheck verifies the state and validity of the healthcheck configuration
// on the container and then executes the healthcheck
func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
func (r *Runtime) HealthCheck(ctx context.Context, name string) (define.HealthCheckStatus, error) {
container, err := r.LookupContainer(name)
if err != nil {
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
@ -36,21 +36,35 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
return hcStatus, err
}
hcStatus, logStatus, err := container.runHealthCheck()
if err := container.processHealthCheckStatus(logStatus); err != nil {
return hcStatus, err
isStartupHC := false
if container.config.StartupHealthCheckConfig != nil {
passed, err := container.StartupHCPassed()
if err != nil {
return define.HealthCheckInternalError, err
}
isStartupHC = !passed
}
hcStatus, logStatus, err := container.runHealthCheck(ctx, isStartupHC)
if !isStartupHC {
if err := container.processHealthCheckStatus(logStatus); err != nil {
return hcStatus, err
}
}
return hcStatus, err
}
// runHealthCheck runs the health check as defined by the container
func (c *Container) runHealthCheck() (define.HealthCheckStatus, string, error) {
func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.HealthCheckStatus, string, error) {
var (
newCommand []string
returnCode int
inStartPeriod bool
)
hcCommand := c.HealthCheckConfig().Test
if isStartup {
logrus.Debugf("Running startup healthcheck for container %s", c.ID())
hcCommand = c.config.StartupHealthCheckConfig.Test
}
if len(hcCommand) < 1 {
return define.HealthCheckNotDefined, "", fmt.Errorf("container %s has no defined healthcheck", c.ID())
}
@ -113,6 +127,18 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, string, error) {
hcResult = define.HealthCheckFailure
returnCode = 1
}
// Handle startup HC
if isStartup {
inStartPeriod = true
if hcErr != nil || exitCode != 0 {
hcResult = define.HealthCheckStartup
c.incrementStartupHCFailureCounter(ctx)
} else {
c.incrementStartupHCSuccessCounter(ctx)
}
}
timeEnd := time.Now()
if c.HealthCheckConfig().StartPeriod > 0 {
// there is a start-period we need to honor; we add startPeriod to container start time
@ -188,6 +214,114 @@ func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
return define.HealthCheckDefined, nil
}
// Increment the current startup healthcheck success counter.
// Can stop the startup HC and start the regular HC if the startup HC has enough
// consecutive successes.
func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
return
}
}
// We don't have a startup HC, can't do anything
if c.config.StartupHealthCheckConfig == nil {
return
}
// Race: someone else got here first
if c.state.StartupHCPassed {
return
}
// Increment the success counter
c.state.StartupHCSuccessCount++
logrus.Debugf("Startup healthcheck for container %s succeeded, success counter now %d", c.ID(), c.state.StartupHCSuccessCount)
// Did we exceed threshold?
recreateTimer := false
if c.config.StartupHealthCheckConfig.Successes == 0 || c.state.StartupHCSuccessCount >= c.config.StartupHealthCheckConfig.Successes {
c.state.StartupHCPassed = true
c.state.StartupHCSuccessCount = 0
c.state.StartupHCFailureCount = 0
recreateTimer = true
}
if err := c.save(); err != nil {
logrus.Errorf("Error saving container %s state: %v", c.ID(), err)
return
}
if recreateTimer {
logrus.Infof("Startup healthcheck for container %s passed, recreating timer", c.ID())
// Create the new, standard healthcheck timer first.
if err := c.createTimer(c.HealthCheckConfig().Interval.String(), false); err != nil {
logrus.Errorf("Error recreating container %s healthcheck: %v", c.ID(), err)
return
}
if err := c.startTimer(false); err != nil {
logrus.Errorf("Error restarting container %s healthcheck timer: %v", c.ID(), err)
}
// This kills the process the healthcheck is running.
// Which happens to be us.
// So this has to be last - after this, systemd serves us a
// SIGTERM and we exit.
if err := c.removeTransientFiles(ctx, true); err != nil {
logrus.Errorf("Error removing container %s healthcheck: %v", c.ID(), err)
return
}
}
}
// Increment the current startup healthcheck failure counter.
// Can restart the container if the HC fails enough times consecutively.
func (c *Container) incrementStartupHCFailureCounter(ctx context.Context) {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
if err := c.syncContainer(); err != nil {
logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
return
}
}
// We don't have a startup HC, can't do anything
if c.config.StartupHealthCheckConfig == nil {
return
}
// Race: someone else got here first
if c.state.StartupHCPassed {
return
}
c.state.StartupHCFailureCount++
logrus.Debugf("Startup healthcheck for container %s failed, failure counter now %d", c.ID(), c.state.StartupHCFailureCount)
if c.config.StartupHealthCheckConfig.Retries != 0 && c.state.StartupHCFailureCount >= c.config.StartupHealthCheckConfig.Retries {
logrus.Infof("Restarting container %s as startup healthcheck failed", c.ID())
// Restart the container
if err := c.restartWithTimeout(ctx, c.config.StopTimeout); err != nil {
logrus.Errorf("Error restarting container %s after healthcheck failure: %v", c.ID(), err)
}
return
}
if err := c.save(); err != nil {
logrus.Errorf("Error saving container %s state: %v", c.ID(), err)
}
}
func newHealthCheckLog(start, end time.Time, exitCode int, log string) define.HealthCheckLog {
return define.HealthCheckLog{
Start: start.Format(time.RFC3339Nano),
@ -299,12 +433,26 @@ func (c *Container) healthCheckStatus() (string, error) {
return results.Status, nil
}
func (c *Container) disableHealthCheckSystemd() bool {
func (c *Container) disableHealthCheckSystemd(isStartup bool) bool {
if os.Getenv("DISABLE_HC_SYSTEMD") == "true" {
return true
}
if isStartup {
if c.config.StartupHealthCheckConfig.Interval == 0 {
return true
}
}
if c.config.HealthCheckConfig.Interval == 0 {
return true
}
return false
}
// Systemd unit name for the healthcheck systemd unit
func (c *Container) hcUnitName(isStartup bool) string {
unitName := c.ID()
if isStartup {
unitName += "-startup"
}
return unitName
}

View File

@ -14,8 +14,8 @@ import (
)
// createTimer systemd timers for healthchecks of a container
func (c *Container) createTimer() error {
if c.disableHealthCheckSystemd() {
func (c *Container) createTimer(interval string, isStartup bool) error {
if c.disableHealthCheckSystemd(isStartup) {
return nil
}
podman, err := os.Executable()
@ -31,7 +31,14 @@ func (c *Container) createTimer() error {
if path != "" {
cmd = append(cmd, "--setenv=PATH="+path)
}
cmd = append(cmd, "--unit", c.ID(), fmt.Sprintf("--on-unit-inactive=%s", c.HealthCheckConfig().Interval.String()), "--timer-property=AccuracySec=1s", podman, "healthcheck", "run", c.ID())
cmd = append(cmd, "--unit", c.hcUnitName(isStartup), fmt.Sprintf("--on-unit-inactive=%s", interval), "--timer-property=AccuracySec=1s", podman)
if logrus.IsLevelEnabled(logrus.DebugLevel) {
cmd = append(cmd, "--log-level=debug", "--syslog")
}
cmd = append(cmd, "healthcheck", "run", c.ID())
conn, err := systemd.ConnectToDBUS()
if err != nil {
@ -58,8 +65,8 @@ func systemdOpSuccessful(c chan string) error {
}
// startTimer starts a systemd timer for the healthchecks
func (c *Container) startTimer() error {
if c.disableHealthCheckSystemd() {
func (c *Container) startTimer(isStartup bool) error {
if c.disableHealthCheckSystemd(isStartup) {
return nil
}
conn, err := systemd.ConnectToDBUS()
@ -68,7 +75,7 @@ func (c *Container) startTimer() error {
}
defer conn.Close()
startFile := fmt.Sprintf("%s.service", c.ID())
startFile := fmt.Sprintf("%s.service", c.hcUnitName(isStartup))
startChan := make(chan string)
if _, err := conn.RestartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
return err
@ -82,8 +89,8 @@ func (c *Container) startTimer() error {
// removeTransientFiles removes the systemd timer and unit files
// for the container
func (c *Container) removeTransientFiles(ctx context.Context) error {
if c.disableHealthCheckSystemd() {
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error {
if c.disableHealthCheckSystemd(isStartup) {
return nil
}
conn, err := systemd.ConnectToDBUS()
@ -99,7 +106,7 @@ func (c *Container) removeTransientFiles(ctx context.Context) error {
// Stop the timer before the service to make sure the timer does not
// fire after the service is stopped.
timerChan := make(chan string)
timerFile := fmt.Sprintf("%s.timer", c.ID())
timerFile := fmt.Sprintf("%s.timer", c.hcUnitName(isStartup))
if _, err := conn.StopUnitContext(ctx, timerFile, "fail", timerChan); err != nil {
if !strings.HasSuffix(err.Error(), ".timer not loaded.") {
stopErrors = append(stopErrors, fmt.Errorf("removing health-check timer %q: %w", timerFile, err))
@ -111,7 +118,7 @@ func (c *Container) removeTransientFiles(ctx context.Context) error {
// Reset the service before stopping it to make sure it's being removed
// on stop.
serviceChan := make(chan string)
serviceFile := fmt.Sprintf("%s.service", c.ID())
serviceFile := fmt.Sprintf("%s.service", c.hcUnitName(isStartup))
if err := conn.ResetFailedUnitContext(ctx, serviceFile); err != nil {
logrus.Debugf("Failed to reset unit file: %q", err)
}

View File

@ -1898,6 +1898,21 @@ func WithInfraConfig(compatibleOptions InfraInherit) CtrCreateOption {
}
}
// WithStartupHealthcheck sets a startup healthcheck for the container.
// Requires that a healthcheck must be set.
func WithStartupHealthcheck(startupHC *define.StartupHealthCheck) CtrCreateOption {
return func(ctr *Container) error {
if ctr.valid {
return define.ErrCtrFinalized
}
ctr.config.StartupHealthCheckConfig = new(define.StartupHealthCheck)
if err := JSONDeepCopy(startupHC, ctr.config.StartupHealthCheckConfig); err != nil {
return fmt.Errorf("error copying startup healthcheck into container: %w", err)
}
return nil
}
}
// Pod Creation Options
// WithPodCreateCommand adds the full command plus arguments of the current

View File

@ -12,7 +12,7 @@ import (
func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
runtime := r.Context().Value(api.RuntimeKey).(*libpod.Runtime)
name := utils.GetName(r)
status, err := runtime.HealthCheck(name)
status, err := runtime.HealthCheck(r.Context(), name)
if err != nil {
if status == define.HealthCheckContainerNotFound {
utils.ContainerNotFound(w, name, err)
@ -32,6 +32,8 @@ func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
hcStatus := define.HealthCheckUnhealthy
if status == define.HealthCheckSuccess {
hcStatus = define.HealthCheckHealthy
} else if status == define.HealthCheckStartup {
hcStatus = define.HealthCheckStarting
}
report := define.HealthCheckResults{
Status: hcStatus,

View File

@ -174,125 +174,129 @@ const (
)
type ContainerCreateOptions struct {
Annotation []string
Attach []string
Authfile string
BlkIOWeight string
BlkIOWeightDevice []string
CapAdd []string
CapDrop []string
CgroupNS string
CgroupsMode string
CgroupParent string `json:"cgroup_parent,omitempty"`
CIDFile string
ConmonPIDFile string `json:"container_conmon_pidfile,omitempty"`
CPUPeriod uint64
CPUQuota int64
CPURTPeriod uint64
CPURTRuntime int64
CPUShares uint64
CPUS float64 `json:"cpus,omitempty"`
CPUSetCPUs string `json:"cpuset_cpus,omitempty"`
CPUSetMems string
Devices []string `json:"devices,omitempty"`
DeviceCgroupRule []string
DeviceReadBPs []string `json:"device_read_bps,omitempty"`
DeviceReadIOPs []string
DeviceWriteBPs []string
DeviceWriteIOPs []string
Entrypoint *string `json:"container_command,omitempty"`
Env []string
EnvHost bool
EnvFile []string
Expose []string
GIDMap []string
GroupAdd []string
HealthCmd string
HealthInterval string
HealthRetries uint
HealthStartPeriod string
HealthTimeout string
HealthOnFailure string
Hostname string `json:"hostname,omitempty"`
HTTPProxy bool
HostUsers []string
ImageVolume string
Init bool
InitContainerType string
InitPath string
Interactive bool
IPC string
Label []string
LabelFile []string
LogDriver string
LogOptions []string
Memory string
MemoryReservation string
MemorySwap string
MemorySwappiness int64
Name string `json:"container_name"`
NoHealthCheck bool
OOMKillDisable bool
OOMScoreAdj *int
Arch string
OS string
Variant string
PID string `json:"pid,omitempty"`
PIDsLimit *int64
Platform string
Pod string
PodIDFile string
Personality string
PreserveFDs uint
Privileged bool
PublishAll bool
Pull string
Quiet bool
ReadOnly bool
ReadOnlyTmpFS bool
Restart string
Replace bool
Requires []string
Rm bool
RootFS bool
Secrets []string
SecurityOpt []string `json:"security_opt,omitempty"`
SdNotifyMode string
ShmSize string
SignaturePolicy string
StopSignal string
StopTimeout uint
StorageOpts []string
SubUIDName string
SubGIDName string
Sysctl []string `json:"sysctl,omitempty"`
Systemd string
Timeout uint
TLSVerify commonFlag.OptionalBool
TmpFS []string
TTY bool
Timezone string
Umask string
EnvMerge []string
UnsetEnv []string
UnsetEnvAll bool
UIDMap []string
Ulimit []string
User string
UserNS string `json:"-"`
UTS string
Mount []string
Volume []string `json:"volume,omitempty"`
VolumesFrom []string `json:"volumes_from,omitempty"`
Workdir string
SeccompPolicy string
PidFile string
ChrootDirs []string
IsInfra bool
IsClone bool
DecryptionKeys []string
Net *NetOptions `json:"net,omitempty"`
Annotation []string
Attach []string
Authfile string
BlkIOWeight string
BlkIOWeightDevice []string
CapAdd []string
CapDrop []string
CgroupNS string
CgroupsMode string
CgroupParent string `json:"cgroup_parent,omitempty"`
CIDFile string
ConmonPIDFile string `json:"container_conmon_pidfile,omitempty"`
CPUPeriod uint64
CPUQuota int64
CPURTPeriod uint64
CPURTRuntime int64
CPUShares uint64
CPUS float64 `json:"cpus,omitempty"`
CPUSetCPUs string `json:"cpuset_cpus,omitempty"`
CPUSetMems string
Devices []string `json:"devices,omitempty"`
DeviceCgroupRule []string
DeviceReadBPs []string `json:"device_read_bps,omitempty"`
DeviceReadIOPs []string
DeviceWriteBPs []string
DeviceWriteIOPs []string
Entrypoint *string `json:"container_command,omitempty"`
Env []string
EnvHost bool
EnvFile []string
Expose []string
GIDMap []string
GroupAdd []string
HealthCmd string
HealthInterval string
HealthRetries uint
HealthStartPeriod string
HealthTimeout string
HealthOnFailure string
Hostname string `json:"hostname,omitempty"`
HTTPProxy bool
HostUsers []string
ImageVolume string
Init bool
InitContainerType string
InitPath string
Interactive bool
IPC string
Label []string
LabelFile []string
LogDriver string
LogOptions []string
Memory string
MemoryReservation string
MemorySwap string
MemorySwappiness int64
Name string `json:"container_name"`
NoHealthCheck bool
OOMKillDisable bool
OOMScoreAdj *int
Arch string
OS string
Variant string
PID string `json:"pid,omitempty"`
PIDsLimit *int64
Platform string
Pod string
PodIDFile string
Personality string
PreserveFDs uint
Privileged bool
PublishAll bool
Pull string
Quiet bool
ReadOnly bool
ReadOnlyTmpFS bool
Restart string
Replace bool
Requires []string
Rm bool
RootFS bool
Secrets []string
SecurityOpt []string `json:"security_opt,omitempty"`
SdNotifyMode string
ShmSize string
SignaturePolicy string
StartupHCCmd string
StartupHCInterval string
StartupHCRetries uint
StartupHCSuccesses uint
StartupHCTimeout string
StopSignal string
StopTimeout uint
StorageOpts []string
SubUIDName string
SubGIDName string
Sysctl []string `json:"sysctl,omitempty"`
Systemd string
Timeout uint
TLSVerify commonFlag.OptionalBool
TmpFS []string
TTY bool
Timezone string
Umask string
EnvMerge []string
UnsetEnv []string
UnsetEnvAll bool
UIDMap []string
Ulimit []string
User string
UserNS string `json:"-"`
UTS string
Mount []string
Volume []string `json:"volume,omitempty"`
VolumesFrom []string `json:"volumes_from,omitempty"`
Workdir string
SeccompPolicy string
PidFile string
ChrootDirs []string
IsInfra bool
IsClone bool
DecryptionKeys []string
Net *NetOptions `json:"net,omitempty"`
CgroupConf []string

View File

@ -8,13 +8,15 @@ import (
)
func (ic *ContainerEngine) HealthCheckRun(ctx context.Context, nameOrID string, options entities.HealthCheckOptions) (*define.HealthCheckResults, error) {
status, err := ic.Libpod.HealthCheck(nameOrID)
status, err := ic.Libpod.HealthCheck(ctx, nameOrID)
if err != nil {
return nil, err
}
hcStatus := define.HealthCheckUnhealthy
if status == define.HealthCheckSuccess {
hcStatus = define.HealthCheckHealthy
} else if status == define.HealthCheckStartup {
hcStatus = define.HealthCheckStarting
}
report := define.HealthCheckResults{
Status: hcStatus,

View File

@ -527,6 +527,9 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig))
logrus.Debugf("New container has a health check")
}
if s.ContainerHealthCheckConfig.StartupHealthConfig != nil {
options = append(options, libpod.WithStartupHealthcheck(s.ContainerHealthCheckConfig.StartupHealthConfig))
}
if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))

View File

@ -536,6 +536,10 @@ type ContainerResourceConfig struct {
type ContainerHealthCheckConfig struct {
HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"`
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"health_check_on_failure_action,omitempty"`
// Startup healthcheck for a container.
// Requires that HealthConfig be set.
// Optional.
StartupHealthConfig *define.StartupHealthCheck `json:"startupHealthConfig,omitempty"`
}
// SpecGenerator creates an OCI spec and Libpod configuration options to create

View File

@ -256,7 +256,7 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
if c.NoHealthCheck {
return errors.New("cannot specify both --no-healthcheck and --health-cmd")
}
s.HealthConfig, err = makeHealthCheckFromCli(c.HealthCmd, c.HealthInterval, c.HealthRetries, c.HealthTimeout, c.HealthStartPeriod)
s.HealthConfig, err = makeHealthCheckFromCli(c.HealthCmd, c.HealthInterval, c.HealthRetries, c.HealthTimeout, c.HealthStartPeriod, false)
if err != nil {
return err
}
@ -272,6 +272,25 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
}
s.HealthCheckOnFailureAction = onFailureAction
if c.StartupHCCmd != "" {
if c.NoHealthCheck {
return errors.New("cannot specify both --no-healthcheck and --health-startup-cmd")
}
// The hardcoded "1s" will be discarded, as the startup
// healthcheck does not have a period. So just hardcode
// something that parses correctly.
tmpHcConfig, err := makeHealthCheckFromCli(c.StartupHCCmd, c.StartupHCInterval, c.StartupHCRetries, c.StartupHCTimeout, "1s", true)
if err != nil {
return err
}
s.StartupHealthConfig = new(define.StartupHealthCheck)
s.StartupHealthConfig.Test = tmpHcConfig.Test
s.StartupHealthConfig.Interval = tmpHcConfig.Interval
s.StartupHealthConfig.Timeout = tmpHcConfig.Timeout
s.StartupHealthConfig.Retries = tmpHcConfig.Retries
s.StartupHealthConfig.Successes = int(c.StartupHCSuccesses)
}
if err := setNamespaces(s, c); err != nil {
return err
}
@ -838,7 +857,7 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
return nil
}
func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, startPeriod string) (*manifest.Schema2HealthConfig, error) {
func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, startPeriod string, isStartup bool) (*manifest.Schema2HealthConfig, error) {
cmdArr := []string{}
isArr := true
err := json.Unmarshal([]byte(inCmd), &cmdArr) // array unmarshalling
@ -886,7 +905,7 @@ func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, start
hc.Interval = intervalDuration
if retries < 1 {
if retries < 1 && !isStartup {
return nil, errors.New("healthcheck-retries must be greater than 0")
}
hc.Retries = int(retries)

View File

@ -334,4 +334,43 @@ HEALTHCHECK CMD ls -l / 2>&1`, ALPINE)
// Check to make sure characters were not coerced to utf8
Expect(inspect[0].Config.Healthcheck).To(HaveField("Test", []string{"CMD-SHELL", "ls -l / 2>&1"}))
})
It("Startup healthcheck success transitions to regular healthcheck", func() {
ctrName := "hcCtr"
ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo regular", "--health-startup-cmd", "cat /test", ALPINE, "top"})
ctrRun.WaitWithDefaultTimeout()
Expect(ctrRun).Should(Exit(0))
inspect := podmanTest.InspectContainer(ctrName)
Expect(inspect[0].State.Health).To(HaveField("Status", "starting"))
hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName})
hc.WaitWithDefaultTimeout()
Expect(hc).Should(Exit(1))
exec := podmanTest.Podman([]string{"exec", ctrName, "sh", "-c", "touch /test && echo startup > /test"})
exec.WaitWithDefaultTimeout()
Expect(exec).Should(Exit(0))
hc = podmanTest.Podman([]string{"healthcheck", "run", ctrName})
hc.WaitWithDefaultTimeout()
Expect(hc).Should(Exit(0))
inspect = podmanTest.InspectContainer(ctrName)
Expect(inspect[0].State.Health).To(HaveField("Status", define.HealthCheckHealthy))
hc = podmanTest.Podman([]string{"healthcheck", "run", ctrName})
hc.WaitWithDefaultTimeout()
Expect(hc).Should(Exit(0))
inspect = podmanTest.InspectContainer(ctrName)
Expect(inspect[0].State.Health).To(HaveField("Status", define.HealthCheckHealthy))
// Test podman ps --filter heath is working (#11687)
ps := podmanTest.Podman([]string{"ps", "--filter", "health=healthy"})
ps.WaitWithDefaultTimeout()
Expect(ps).Should(Exit(0))
Expect(ps.OutputToStringArray()).To(HaveLen(2))
Expect(ps.OutputToString()).To(ContainSubstring("hc"))
})
})