mirror of
https://github.com/containers/podman.git
synced 2025-05-21 00:56:36 +08:00
Add support for startup healthchecks
Startup healthchecks are similar to K8S startup probes, in that they are a separate check from the regular healthcheck that runs before it. If the startup healthcheck fails repeatedly, the associated container is restarted. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
@ -180,7 +180,7 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
|
|||||||
createFlags.StringVar(
|
createFlags.StringVar(
|
||||||
&cf.HealthInterval,
|
&cf.HealthInterval,
|
||||||
healthIntervalFlagName, define.DefaultHealthCheckInterval,
|
healthIntervalFlagName, define.DefaultHealthCheckInterval,
|
||||||
"set an interval for the healthchecks (a value of disable results in no automatic timer setup)",
|
"set an interval for the healthcheck (a value of disable results in no automatic timer setup)",
|
||||||
)
|
)
|
||||||
_ = cmd.RegisterFlagCompletionFunc(healthIntervalFlagName, completion.AutocompleteNone)
|
_ = cmd.RegisterFlagCompletionFunc(healthIntervalFlagName, completion.AutocompleteNone)
|
||||||
|
|
||||||
@ -428,6 +428,46 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
|
|||||||
)
|
)
|
||||||
_ = cmd.RegisterFlagCompletionFunc(secretFlagName, AutocompleteSecrets)
|
_ = cmd.RegisterFlagCompletionFunc(secretFlagName, AutocompleteSecrets)
|
||||||
|
|
||||||
|
startupHCCmdFlagName := "health-startup-cmd"
|
||||||
|
createFlags.StringVar(
|
||||||
|
&cf.StartupHCCmd,
|
||||||
|
startupHCCmdFlagName, "",
|
||||||
|
"Set a startup healthcheck command for the container",
|
||||||
|
)
|
||||||
|
_ = cmd.RegisterFlagCompletionFunc(startupHCCmdFlagName, completion.AutocompleteNone)
|
||||||
|
|
||||||
|
startupHCIntervalFlagName := "health-startup-interval"
|
||||||
|
createFlags.StringVar(
|
||||||
|
&cf.StartupHCInterval,
|
||||||
|
startupHCIntervalFlagName, define.DefaultHealthCheckInterval,
|
||||||
|
"Set an interval for the startup healthcheck",
|
||||||
|
)
|
||||||
|
_ = cmd.RegisterFlagCompletionFunc(startupHCIntervalFlagName, completion.AutocompleteNone)
|
||||||
|
|
||||||
|
startupHCRetriesFlagName := "health-startup-retries"
|
||||||
|
createFlags.UintVar(
|
||||||
|
&cf.StartupHCRetries,
|
||||||
|
startupHCRetriesFlagName, 0,
|
||||||
|
"Set the maximum number of retries before the startup healthcheck will restart the container",
|
||||||
|
)
|
||||||
|
_ = cmd.RegisterFlagCompletionFunc(startupHCRetriesFlagName, completion.AutocompleteNone)
|
||||||
|
|
||||||
|
startupHCSuccessesFlagName := "health-startup-success"
|
||||||
|
createFlags.UintVar(
|
||||||
|
&cf.StartupHCSuccesses,
|
||||||
|
startupHCSuccessesFlagName, 0,
|
||||||
|
"Set the number of consecutive successes before the startup healthcheck is marked as successful and the normal healthcheck begins (0 indicates any success will start the regular healthcheck)",
|
||||||
|
)
|
||||||
|
_ = cmd.RegisterFlagCompletionFunc(startupHCSuccessesFlagName, completion.AutocompleteNone)
|
||||||
|
|
||||||
|
startupHCTimeoutFlagName := "health-startup-timeout"
|
||||||
|
createFlags.StringVar(
|
||||||
|
&cf.StartupHCTimeout,
|
||||||
|
startupHCTimeoutFlagName, define.DefaultHealthCheckTimeout,
|
||||||
|
"Set the maximum amount of time that the startup healthcheck may take before it is considered failed",
|
||||||
|
)
|
||||||
|
_ = cmd.RegisterFlagCompletionFunc(startupHCTimeoutFlagName, completion.AutocompleteNone)
|
||||||
|
|
||||||
stopSignalFlagName := "stop-signal"
|
stopSignalFlagName := "stop-signal"
|
||||||
createFlags.StringVar(
|
createFlags.StringVar(
|
||||||
&cf.StopSignal,
|
&cf.StopSignal,
|
||||||
|
@ -35,7 +35,7 @@ func run(cmd *cobra.Command, args []string) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if response.Status == define.HealthCheckUnhealthy {
|
if response.Status == define.HealthCheckUnhealthy || response.Status == define.HealthCheckStarting {
|
||||||
registry.SetExitCode(1)
|
registry.SetExitCode(1)
|
||||||
fmt.Println(response.Status)
|
fmt.Println(response.Status)
|
||||||
}
|
}
|
||||||
|
11
docs/source/markdown/options/health-startup-cmd.md
Normal file
11
docs/source/markdown/options/health-startup-cmd.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
####> This option file is used in:
|
||||||
|
####> podman create, run
|
||||||
|
####> If you edit this file, make sure your changes
|
||||||
|
####> are applicable to all of those.
|
||||||
|
#### **--health-startup-cmd**=*"command"* | *'["command", "arg1", ...]'*
|
||||||
|
|
||||||
|
Set a startup healthcheck command for a container. This command will be executed inside the container and is used to gate the regular
|
||||||
|
healthcheck. When the startup command succeeds, the regular healthcheck will begin and the startup healthcheck will cease. Optionally,
|
||||||
|
if the command fails for a set number of attempts, the container will be restarted. A startup healthcheck can be used to ensure that
|
||||||
|
containers with an extended startup period are not marked as unhealthy until they are fully started. Startup healthchecks can only be
|
||||||
|
used when a regular healthcheck (from the container's image or the **--health-cmd** option) is also set.
|
7
docs/source/markdown/options/health-startup-interval.md
Normal file
7
docs/source/markdown/options/health-startup-interval.md
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
####> This option file is used in:
|
||||||
|
####> podman create, run
|
||||||
|
####> If you edit this file, make sure your changes
|
||||||
|
####> are applicable to all of those.
|
||||||
|
#### **--health-startup-interval**=*interval*
|
||||||
|
|
||||||
|
Set an interval for the startup healthcheck. An _interval_ of **disable** results in no automatic timer setup. The default is **30s**.
|
8
docs/source/markdown/options/health-startup-retries.md
Normal file
8
docs/source/markdown/options/health-startup-retries.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
####> This option file is used in:
|
||||||
|
####> podman create, run
|
||||||
|
####> If you edit this file, make sure your changes
|
||||||
|
####> are applicable to all of those.
|
||||||
|
#### **--health-startup-retries**=*retries*
|
||||||
|
|
||||||
|
The number of attempts allowed before the startup healthcheck restarts the container. If set to **0**, the container will never be
|
||||||
|
restarted. The default is **0**.
|
8
docs/source/markdown/options/health-startup-success.md
Normal file
8
docs/source/markdown/options/health-startup-success.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
####> This option file is used in:
|
||||||
|
####> podman create, run
|
||||||
|
####> If you edit this file, make sure your changes
|
||||||
|
####> are applicable to all of those.
|
||||||
|
#### **--health-startup-success**=*retries*
|
||||||
|
|
||||||
|
The number of successful runs required before the startup healthcheck will succeed and the regular healthcheck will begin. A value
|
||||||
|
of **0** means that any success will begin the regular healthcheck. The default is **0**.
|
8
docs/source/markdown/options/health-startup-timeout.md
Normal file
8
docs/source/markdown/options/health-startup-timeout.md
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
####> This option file is used in:
|
||||||
|
####> podman create, run
|
||||||
|
####> If you edit this file, make sure your changes
|
||||||
|
####> are applicable to all of those.
|
||||||
|
#### **--health-startup-timeout**=*timeout*
|
||||||
|
|
||||||
|
The maximum time a startup healthcheck command has to complete before it is marked as failed. The value can be expressed in a time
|
||||||
|
format like **2m3s**. The default value is **30s**.
|
@ -172,6 +172,16 @@ See [**Environment**](#environment) note below for precedence and examples.
|
|||||||
|
|
||||||
@@option health-start-period
|
@@option health-start-period
|
||||||
|
|
||||||
|
@@option health-startup-cmd
|
||||||
|
|
||||||
|
@@option health-startup-interval
|
||||||
|
|
||||||
|
@@option health-startup-retries
|
||||||
|
|
||||||
|
@@option health-startup-success
|
||||||
|
|
||||||
|
@@option health-startup-timeout
|
||||||
|
|
||||||
@@option health-timeout
|
@@option health-timeout
|
||||||
|
|
||||||
#### **--help**
|
#### **--help**
|
||||||
|
@ -204,6 +204,16 @@ See [**Environment**](#environment) note below for precedence and examples.
|
|||||||
|
|
||||||
@@option health-start-period
|
@@option health-start-period
|
||||||
|
|
||||||
|
@@option health-startup-cmd
|
||||||
|
|
||||||
|
@@option health-startup-interval
|
||||||
|
|
||||||
|
@@option health-startup-retries
|
||||||
|
|
||||||
|
@@option health-startup-success
|
||||||
|
|
||||||
|
@@option health-startup-timeout
|
||||||
|
|
||||||
@@option health-timeout
|
@@option health-timeout
|
||||||
|
|
||||||
#### **--help**
|
#### **--help**
|
||||||
|
@ -200,6 +200,18 @@ type ContainerState struct {
|
|||||||
// (only by restart policy).
|
// (only by restart policy).
|
||||||
RestartCount uint `json:"restartCount,omitempty"`
|
RestartCount uint `json:"restartCount,omitempty"`
|
||||||
|
|
||||||
|
// StartupHCPassed indicates that the startup healthcheck has
|
||||||
|
// succeeded and the main healthcheck can begin.
|
||||||
|
StartupHCPassed bool `json:"startupHCPassed,omitempty"`
|
||||||
|
// StartupHCSuccessCount indicates the number of successes of the
|
||||||
|
// startup healthcheck. A startup HC can require more than one success
|
||||||
|
// to be marked as passed.
|
||||||
|
StartupHCSuccessCount int `json:"startupHCSuccessCount,omitempty"`
|
||||||
|
// StartupHCFailureCount indicates the number of failures of the startup
|
||||||
|
// healthcheck. The container will be restarted if this exceed a set
|
||||||
|
// number in the startup HC config.
|
||||||
|
StartupHCFailureCount int `json:"startupHCFailureCount,omitempty"`
|
||||||
|
|
||||||
// ExtensionStageHooks holds hooks which will be executed by libpod
|
// ExtensionStageHooks holds hooks which will be executed by libpod
|
||||||
// and not delegated to the OCI runtime.
|
// and not delegated to the OCI runtime.
|
||||||
ExtensionStageHooks map[string][]spec.Hook `json:"extensionStageHooks,omitempty"`
|
ExtensionStageHooks map[string][]spec.Hook `json:"extensionStageHooks,omitempty"`
|
||||||
@ -929,6 +941,20 @@ func (c *Container) StoppedByUser() (bool, error) {
|
|||||||
return c.state.StoppedByUser, nil
|
return c.state.StoppedByUser, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StartupHCPassed returns whether the container's startup healthcheck passed.
|
||||||
|
func (c *Container) StartupHCPassed() (bool, error) {
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
|
if err := c.syncContainer(); err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.state.StartupHCPassed, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Misc Accessors
|
// Misc Accessors
|
||||||
// Most will require locking
|
// Most will require locking
|
||||||
|
|
||||||
|
@ -395,6 +395,10 @@ type ContainerMiscConfig struct {
|
|||||||
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
|
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
|
||||||
// HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
|
// HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
|
||||||
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
|
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
|
||||||
|
// StartupHealthCheckConfig is the configuration of the startup
|
||||||
|
// healthcheck for the container. This will run before the regular HC
|
||||||
|
// runs, and when it passes the regular HC will be activated.
|
||||||
|
StartupHealthCheckConfig *define.StartupHealthCheck `json:"startupHealthCheck,omitempty"`
|
||||||
// PreserveFDs is a number of additional file descriptors (in addition
|
// PreserveFDs is a number of additional file descriptors (in addition
|
||||||
// to 0, 1, 2) that will be passed to the executed process. The total FDs
|
// to 0, 1, 2) that will be passed to the executed process. The total FDs
|
||||||
// passed will be 3 + PreserveFDs.
|
// passed will be 3 + PreserveFDs.
|
||||||
|
@ -622,6 +622,9 @@ func resetState(state *ContainerState) {
|
|||||||
state.CheckpointPath = ""
|
state.CheckpointPath = ""
|
||||||
state.CheckpointLog = ""
|
state.CheckpointLog = ""
|
||||||
state.RestoreLog = ""
|
state.RestoreLog = ""
|
||||||
|
state.StartupHCPassed = false
|
||||||
|
state.StartupHCSuccessCount = 0
|
||||||
|
state.StartupHCFailureCount = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Refresh refreshes the container's state after a restart.
|
// Refresh refreshes the container's state after a restart.
|
||||||
@ -1072,6 +1075,9 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
|
|||||||
c.state.State = define.ContainerStateCreated
|
c.state.State = define.ContainerStateCreated
|
||||||
c.state.StoppedByUser = false
|
c.state.StoppedByUser = false
|
||||||
c.state.RestartPolicyMatch = false
|
c.state.RestartPolicyMatch = false
|
||||||
|
c.state.StartupHCFailureCount = 0
|
||||||
|
c.state.StartupHCSuccessCount = 0
|
||||||
|
c.state.StartupHCPassed = false
|
||||||
|
|
||||||
if !retainRetries {
|
if !retainRetries {
|
||||||
c.state.RestartCount = 0
|
c.state.RestartCount = 0
|
||||||
@ -1091,7 +1097,11 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if c.config.HealthCheckConfig != nil {
|
if c.config.HealthCheckConfig != nil {
|
||||||
if err := c.createTimer(); err != nil {
|
timer := c.config.HealthCheckConfig.Interval.String()
|
||||||
|
if c.config.StartupHealthCheckConfig != nil {
|
||||||
|
timer = c.config.StartupHealthCheckConfig.Interval.String()
|
||||||
|
}
|
||||||
|
if err := c.createTimer(timer, c.config.StartupHealthCheckConfig != nil); err != nil {
|
||||||
logrus.Error(err)
|
logrus.Error(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1244,7 +1254,7 @@ func (c *Container) start() error {
|
|||||||
if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil {
|
if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil {
|
||||||
logrus.Error(err)
|
logrus.Error(err)
|
||||||
}
|
}
|
||||||
if err := c.startTimer(); err != nil {
|
if err := c.startTimer(c.config.StartupHealthCheckConfig != nil); err != nil {
|
||||||
logrus.Error(err)
|
logrus.Error(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1422,7 +1432,7 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if c.config.HealthCheckConfig != nil {
|
if c.config.HealthCheckConfig != nil {
|
||||||
if err := c.removeTransientFiles(context.Background()); err != nil {
|
if err := c.removeTransientFiles(context.Background(), c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
|
||||||
logrus.Error(err.Error())
|
logrus.Error(err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1859,7 +1869,7 @@ func (c *Container) cleanup(ctx context.Context) error {
|
|||||||
|
|
||||||
// Remove healthcheck unit/timer file if it execs
|
// Remove healthcheck unit/timer file if it execs
|
||||||
if c.config.HealthCheckConfig != nil {
|
if c.config.HealthCheckConfig != nil {
|
||||||
if err := c.removeTransientFiles(ctx); err != nil {
|
if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
|
||||||
logrus.Errorf("Removing timer for container %s healthcheck: %v", c.ID(), err)
|
logrus.Errorf("Removing timer for container %s healthcheck: %v", c.ID(), err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -156,6 +156,11 @@ func (c *Container) validate() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cannot set startup HC without a healthcheck
|
||||||
|
if c.config.HealthCheckConfig == nil && c.config.StartupHealthCheckConfig != nil {
|
||||||
|
return fmt.Errorf("cannot set a startup healthcheck when there is no regular healthcheck: %w", define.ErrInvalidArg)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,6 +3,8 @@ package define
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/containers/image/v5/manifest"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -38,6 +40,9 @@ const (
|
|||||||
HealthCheckInternalError HealthCheckStatus = iota
|
HealthCheckInternalError HealthCheckStatus = iota
|
||||||
// HealthCheckDefined means the healthcheck was found on the container
|
// HealthCheckDefined means the healthcheck was found on the container
|
||||||
HealthCheckDefined HealthCheckStatus = iota
|
HealthCheckDefined HealthCheckStatus = iota
|
||||||
|
// HealthCheckStartup means the healthcheck was unhealthy, but is still
|
||||||
|
// either within the startup HC or the startup period of the healthcheck
|
||||||
|
HealthCheckStartup HealthCheckStatus = iota
|
||||||
)
|
)
|
||||||
|
|
||||||
// Healthcheck defaults. These are used both in the cli as well in
|
// Healthcheck defaults. These are used both in the cli as well in
|
||||||
@ -131,3 +136,12 @@ func ParseHealthCheckOnFailureAction(s string) (HealthCheckOnFailureAction, erro
|
|||||||
return HealthCheckOnFailureActionInvalid, err
|
return HealthCheckOnFailureActionInvalid, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StartupHealthCheck is the configuration of a startup healthcheck.
|
||||||
|
type StartupHealthCheck struct {
|
||||||
|
manifest.Schema2HealthConfig
|
||||||
|
// Successes are the number of successes required to mark the startup HC
|
||||||
|
// as passed.
|
||||||
|
// If set to 0, a single success will mark the HC as passed.
|
||||||
|
Successes int `json:",omitempty"`
|
||||||
|
}
|
||||||
|
@ -25,7 +25,7 @@ const (
|
|||||||
|
|
||||||
// HealthCheck verifies the state and validity of the healthcheck configuration
|
// HealthCheck verifies the state and validity of the healthcheck configuration
|
||||||
// on the container and then executes the healthcheck
|
// on the container and then executes the healthcheck
|
||||||
func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
|
func (r *Runtime) HealthCheck(ctx context.Context, name string) (define.HealthCheckStatus, error) {
|
||||||
container, err := r.LookupContainer(name)
|
container, err := r.LookupContainer(name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
|
return define.HealthCheckContainerNotFound, fmt.Errorf("unable to look up %s to perform a health check: %w", name, err)
|
||||||
@ -36,21 +36,35 @@ func (r *Runtime) HealthCheck(name string) (define.HealthCheckStatus, error) {
|
|||||||
return hcStatus, err
|
return hcStatus, err
|
||||||
}
|
}
|
||||||
|
|
||||||
hcStatus, logStatus, err := container.runHealthCheck()
|
isStartupHC := false
|
||||||
if err := container.processHealthCheckStatus(logStatus); err != nil {
|
if container.config.StartupHealthCheckConfig != nil {
|
||||||
return hcStatus, err
|
passed, err := container.StartupHCPassed()
|
||||||
|
if err != nil {
|
||||||
|
return define.HealthCheckInternalError, err
|
||||||
|
}
|
||||||
|
isStartupHC = !passed
|
||||||
|
}
|
||||||
|
|
||||||
|
hcStatus, logStatus, err := container.runHealthCheck(ctx, isStartupHC)
|
||||||
|
if !isStartupHC {
|
||||||
|
if err := container.processHealthCheckStatus(logStatus); err != nil {
|
||||||
|
return hcStatus, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return hcStatus, err
|
return hcStatus, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// runHealthCheck runs the health check as defined by the container
|
func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.HealthCheckStatus, string, error) {
|
||||||
func (c *Container) runHealthCheck() (define.HealthCheckStatus, string, error) {
|
|
||||||
var (
|
var (
|
||||||
newCommand []string
|
newCommand []string
|
||||||
returnCode int
|
returnCode int
|
||||||
inStartPeriod bool
|
inStartPeriod bool
|
||||||
)
|
)
|
||||||
hcCommand := c.HealthCheckConfig().Test
|
hcCommand := c.HealthCheckConfig().Test
|
||||||
|
if isStartup {
|
||||||
|
logrus.Debugf("Running startup healthcheck for container %s", c.ID())
|
||||||
|
hcCommand = c.config.StartupHealthCheckConfig.Test
|
||||||
|
}
|
||||||
if len(hcCommand) < 1 {
|
if len(hcCommand) < 1 {
|
||||||
return define.HealthCheckNotDefined, "", fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
return define.HealthCheckNotDefined, "", fmt.Errorf("container %s has no defined healthcheck", c.ID())
|
||||||
}
|
}
|
||||||
@ -113,6 +127,18 @@ func (c *Container) runHealthCheck() (define.HealthCheckStatus, string, error) {
|
|||||||
hcResult = define.HealthCheckFailure
|
hcResult = define.HealthCheckFailure
|
||||||
returnCode = 1
|
returnCode = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle startup HC
|
||||||
|
if isStartup {
|
||||||
|
inStartPeriod = true
|
||||||
|
if hcErr != nil || exitCode != 0 {
|
||||||
|
hcResult = define.HealthCheckStartup
|
||||||
|
c.incrementStartupHCFailureCounter(ctx)
|
||||||
|
} else {
|
||||||
|
c.incrementStartupHCSuccessCounter(ctx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
timeEnd := time.Now()
|
timeEnd := time.Now()
|
||||||
if c.HealthCheckConfig().StartPeriod > 0 {
|
if c.HealthCheckConfig().StartPeriod > 0 {
|
||||||
// there is a start-period we need to honor; we add startPeriod to container start time
|
// there is a start-period we need to honor; we add startPeriod to container start time
|
||||||
@ -188,6 +214,114 @@ func checkHealthCheckCanBeRun(c *Container) (define.HealthCheckStatus, error) {
|
|||||||
return define.HealthCheckDefined, nil
|
return define.HealthCheckDefined, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Increment the current startup healthcheck success counter.
|
||||||
|
// Can stop the startup HC and start the regular HC if the startup HC has enough
|
||||||
|
// consecutive successes.
|
||||||
|
func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) {
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
|
if err := c.syncContainer(); err != nil {
|
||||||
|
logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't have a startup HC, can't do anything
|
||||||
|
if c.config.StartupHealthCheckConfig == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Race: someone else got here first
|
||||||
|
if c.state.StartupHCPassed {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment the success counter
|
||||||
|
c.state.StartupHCSuccessCount++
|
||||||
|
|
||||||
|
logrus.Debugf("Startup healthcheck for container %s succeeded, success counter now %d", c.ID(), c.state.StartupHCSuccessCount)
|
||||||
|
|
||||||
|
// Did we exceed threshold?
|
||||||
|
recreateTimer := false
|
||||||
|
if c.config.StartupHealthCheckConfig.Successes == 0 || c.state.StartupHCSuccessCount >= c.config.StartupHealthCheckConfig.Successes {
|
||||||
|
c.state.StartupHCPassed = true
|
||||||
|
c.state.StartupHCSuccessCount = 0
|
||||||
|
c.state.StartupHCFailureCount = 0
|
||||||
|
|
||||||
|
recreateTimer = true
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.save(); err != nil {
|
||||||
|
logrus.Errorf("Error saving container %s state: %v", c.ID(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if recreateTimer {
|
||||||
|
logrus.Infof("Startup healthcheck for container %s passed, recreating timer", c.ID())
|
||||||
|
|
||||||
|
// Create the new, standard healthcheck timer first.
|
||||||
|
if err := c.createTimer(c.HealthCheckConfig().Interval.String(), false); err != nil {
|
||||||
|
logrus.Errorf("Error recreating container %s healthcheck: %v", c.ID(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := c.startTimer(false); err != nil {
|
||||||
|
logrus.Errorf("Error restarting container %s healthcheck timer: %v", c.ID(), err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// This kills the process the healthcheck is running.
|
||||||
|
// Which happens to be us.
|
||||||
|
// So this has to be last - after this, systemd serves us a
|
||||||
|
// SIGTERM and we exit.
|
||||||
|
if err := c.removeTransientFiles(ctx, true); err != nil {
|
||||||
|
logrus.Errorf("Error removing container %s healthcheck: %v", c.ID(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment the current startup healthcheck failure counter.
|
||||||
|
// Can restart the container if the HC fails enough times consecutively.
|
||||||
|
func (c *Container) incrementStartupHCFailureCounter(ctx context.Context) {
|
||||||
|
if !c.batched {
|
||||||
|
c.lock.Lock()
|
||||||
|
defer c.lock.Unlock()
|
||||||
|
|
||||||
|
if err := c.syncContainer(); err != nil {
|
||||||
|
logrus.Errorf("Error syncing container %s state: %v", c.ID(), err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We don't have a startup HC, can't do anything
|
||||||
|
if c.config.StartupHealthCheckConfig == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Race: someone else got here first
|
||||||
|
if c.state.StartupHCPassed {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
c.state.StartupHCFailureCount++
|
||||||
|
|
||||||
|
logrus.Debugf("Startup healthcheck for container %s failed, failure counter now %d", c.ID(), c.state.StartupHCFailureCount)
|
||||||
|
|
||||||
|
if c.config.StartupHealthCheckConfig.Retries != 0 && c.state.StartupHCFailureCount >= c.config.StartupHealthCheckConfig.Retries {
|
||||||
|
logrus.Infof("Restarting container %s as startup healthcheck failed", c.ID())
|
||||||
|
// Restart the container
|
||||||
|
if err := c.restartWithTimeout(ctx, c.config.StopTimeout); err != nil {
|
||||||
|
logrus.Errorf("Error restarting container %s after healthcheck failure: %v", c.ID(), err)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.save(); err != nil {
|
||||||
|
logrus.Errorf("Error saving container %s state: %v", c.ID(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func newHealthCheckLog(start, end time.Time, exitCode int, log string) define.HealthCheckLog {
|
func newHealthCheckLog(start, end time.Time, exitCode int, log string) define.HealthCheckLog {
|
||||||
return define.HealthCheckLog{
|
return define.HealthCheckLog{
|
||||||
Start: start.Format(time.RFC3339Nano),
|
Start: start.Format(time.RFC3339Nano),
|
||||||
@ -299,12 +433,26 @@ func (c *Container) healthCheckStatus() (string, error) {
|
|||||||
return results.Status, nil
|
return results.Status, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Container) disableHealthCheckSystemd() bool {
|
func (c *Container) disableHealthCheckSystemd(isStartup bool) bool {
|
||||||
if os.Getenv("DISABLE_HC_SYSTEMD") == "true" {
|
if os.Getenv("DISABLE_HC_SYSTEMD") == "true" {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
if isStartup {
|
||||||
|
if c.config.StartupHealthCheckConfig.Interval == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
if c.config.HealthCheckConfig.Interval == 0 {
|
if c.config.HealthCheckConfig.Interval == 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Systemd unit name for the healthcheck systemd unit
|
||||||
|
func (c *Container) hcUnitName(isStartup bool) string {
|
||||||
|
unitName := c.ID()
|
||||||
|
if isStartup {
|
||||||
|
unitName += "-startup"
|
||||||
|
}
|
||||||
|
return unitName
|
||||||
|
}
|
||||||
|
@ -14,8 +14,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// createTimer systemd timers for healthchecks of a container
|
// createTimer systemd timers for healthchecks of a container
|
||||||
func (c *Container) createTimer() error {
|
func (c *Container) createTimer(interval string, isStartup bool) error {
|
||||||
if c.disableHealthCheckSystemd() {
|
if c.disableHealthCheckSystemd(isStartup) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
podman, err := os.Executable()
|
podman, err := os.Executable()
|
||||||
@ -31,7 +31,14 @@ func (c *Container) createTimer() error {
|
|||||||
if path != "" {
|
if path != "" {
|
||||||
cmd = append(cmd, "--setenv=PATH="+path)
|
cmd = append(cmd, "--setenv=PATH="+path)
|
||||||
}
|
}
|
||||||
cmd = append(cmd, "--unit", c.ID(), fmt.Sprintf("--on-unit-inactive=%s", c.HealthCheckConfig().Interval.String()), "--timer-property=AccuracySec=1s", podman, "healthcheck", "run", c.ID())
|
|
||||||
|
cmd = append(cmd, "--unit", c.hcUnitName(isStartup), fmt.Sprintf("--on-unit-inactive=%s", interval), "--timer-property=AccuracySec=1s", podman)
|
||||||
|
|
||||||
|
if logrus.IsLevelEnabled(logrus.DebugLevel) {
|
||||||
|
cmd = append(cmd, "--log-level=debug", "--syslog")
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd = append(cmd, "healthcheck", "run", c.ID())
|
||||||
|
|
||||||
conn, err := systemd.ConnectToDBUS()
|
conn, err := systemd.ConnectToDBUS()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -58,8 +65,8 @@ func systemdOpSuccessful(c chan string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// startTimer starts a systemd timer for the healthchecks
|
// startTimer starts a systemd timer for the healthchecks
|
||||||
func (c *Container) startTimer() error {
|
func (c *Container) startTimer(isStartup bool) error {
|
||||||
if c.disableHealthCheckSystemd() {
|
if c.disableHealthCheckSystemd(isStartup) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
conn, err := systemd.ConnectToDBUS()
|
conn, err := systemd.ConnectToDBUS()
|
||||||
@ -68,7 +75,7 @@ func (c *Container) startTimer() error {
|
|||||||
}
|
}
|
||||||
defer conn.Close()
|
defer conn.Close()
|
||||||
|
|
||||||
startFile := fmt.Sprintf("%s.service", c.ID())
|
startFile := fmt.Sprintf("%s.service", c.hcUnitName(isStartup))
|
||||||
startChan := make(chan string)
|
startChan := make(chan string)
|
||||||
if _, err := conn.RestartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
|
if _, err := conn.RestartUnitContext(context.Background(), startFile, "fail", startChan); err != nil {
|
||||||
return err
|
return err
|
||||||
@ -82,8 +89,8 @@ func (c *Container) startTimer() error {
|
|||||||
|
|
||||||
// removeTransientFiles removes the systemd timer and unit files
|
// removeTransientFiles removes the systemd timer and unit files
|
||||||
// for the container
|
// for the container
|
||||||
func (c *Container) removeTransientFiles(ctx context.Context) error {
|
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error {
|
||||||
if c.disableHealthCheckSystemd() {
|
if c.disableHealthCheckSystemd(isStartup) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
conn, err := systemd.ConnectToDBUS()
|
conn, err := systemd.ConnectToDBUS()
|
||||||
@ -99,7 +106,7 @@ func (c *Container) removeTransientFiles(ctx context.Context) error {
|
|||||||
// Stop the timer before the service to make sure the timer does not
|
// Stop the timer before the service to make sure the timer does not
|
||||||
// fire after the service is stopped.
|
// fire after the service is stopped.
|
||||||
timerChan := make(chan string)
|
timerChan := make(chan string)
|
||||||
timerFile := fmt.Sprintf("%s.timer", c.ID())
|
timerFile := fmt.Sprintf("%s.timer", c.hcUnitName(isStartup))
|
||||||
if _, err := conn.StopUnitContext(ctx, timerFile, "fail", timerChan); err != nil {
|
if _, err := conn.StopUnitContext(ctx, timerFile, "fail", timerChan); err != nil {
|
||||||
if !strings.HasSuffix(err.Error(), ".timer not loaded.") {
|
if !strings.HasSuffix(err.Error(), ".timer not loaded.") {
|
||||||
stopErrors = append(stopErrors, fmt.Errorf("removing health-check timer %q: %w", timerFile, err))
|
stopErrors = append(stopErrors, fmt.Errorf("removing health-check timer %q: %w", timerFile, err))
|
||||||
@ -111,7 +118,7 @@ func (c *Container) removeTransientFiles(ctx context.Context) error {
|
|||||||
// Reset the service before stopping it to make sure it's being removed
|
// Reset the service before stopping it to make sure it's being removed
|
||||||
// on stop.
|
// on stop.
|
||||||
serviceChan := make(chan string)
|
serviceChan := make(chan string)
|
||||||
serviceFile := fmt.Sprintf("%s.service", c.ID())
|
serviceFile := fmt.Sprintf("%s.service", c.hcUnitName(isStartup))
|
||||||
if err := conn.ResetFailedUnitContext(ctx, serviceFile); err != nil {
|
if err := conn.ResetFailedUnitContext(ctx, serviceFile); err != nil {
|
||||||
logrus.Debugf("Failed to reset unit file: %q", err)
|
logrus.Debugf("Failed to reset unit file: %q", err)
|
||||||
}
|
}
|
||||||
|
@ -1898,6 +1898,21 @@ func WithInfraConfig(compatibleOptions InfraInherit) CtrCreateOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithStartupHealthcheck sets a startup healthcheck for the container.
|
||||||
|
// Requires that a healthcheck must be set.
|
||||||
|
func WithStartupHealthcheck(startupHC *define.StartupHealthCheck) CtrCreateOption {
|
||||||
|
return func(ctr *Container) error {
|
||||||
|
if ctr.valid {
|
||||||
|
return define.ErrCtrFinalized
|
||||||
|
}
|
||||||
|
ctr.config.StartupHealthCheckConfig = new(define.StartupHealthCheck)
|
||||||
|
if err := JSONDeepCopy(startupHC, ctr.config.StartupHealthCheckConfig); err != nil {
|
||||||
|
return fmt.Errorf("error copying startup healthcheck into container: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Pod Creation Options
|
// Pod Creation Options
|
||||||
|
|
||||||
// WithPodCreateCommand adds the full command plus arguments of the current
|
// WithPodCreateCommand adds the full command plus arguments of the current
|
||||||
|
@ -12,7 +12,7 @@ import (
|
|||||||
func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
|
func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||||
runtime := r.Context().Value(api.RuntimeKey).(*libpod.Runtime)
|
runtime := r.Context().Value(api.RuntimeKey).(*libpod.Runtime)
|
||||||
name := utils.GetName(r)
|
name := utils.GetName(r)
|
||||||
status, err := runtime.HealthCheck(name)
|
status, err := runtime.HealthCheck(r.Context(), name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if status == define.HealthCheckContainerNotFound {
|
if status == define.HealthCheckContainerNotFound {
|
||||||
utils.ContainerNotFound(w, name, err)
|
utils.ContainerNotFound(w, name, err)
|
||||||
@ -32,6 +32,8 @@ func RunHealthCheck(w http.ResponseWriter, r *http.Request) {
|
|||||||
hcStatus := define.HealthCheckUnhealthy
|
hcStatus := define.HealthCheckUnhealthy
|
||||||
if status == define.HealthCheckSuccess {
|
if status == define.HealthCheckSuccess {
|
||||||
hcStatus = define.HealthCheckHealthy
|
hcStatus = define.HealthCheckHealthy
|
||||||
|
} else if status == define.HealthCheckStartup {
|
||||||
|
hcStatus = define.HealthCheckStarting
|
||||||
}
|
}
|
||||||
report := define.HealthCheckResults{
|
report := define.HealthCheckResults{
|
||||||
Status: hcStatus,
|
Status: hcStatus,
|
||||||
|
@ -174,125 +174,129 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type ContainerCreateOptions struct {
|
type ContainerCreateOptions struct {
|
||||||
Annotation []string
|
Annotation []string
|
||||||
Attach []string
|
Attach []string
|
||||||
Authfile string
|
Authfile string
|
||||||
BlkIOWeight string
|
BlkIOWeight string
|
||||||
BlkIOWeightDevice []string
|
BlkIOWeightDevice []string
|
||||||
CapAdd []string
|
CapAdd []string
|
||||||
CapDrop []string
|
CapDrop []string
|
||||||
CgroupNS string
|
CgroupNS string
|
||||||
CgroupsMode string
|
CgroupsMode string
|
||||||
CgroupParent string `json:"cgroup_parent,omitempty"`
|
CgroupParent string `json:"cgroup_parent,omitempty"`
|
||||||
CIDFile string
|
CIDFile string
|
||||||
ConmonPIDFile string `json:"container_conmon_pidfile,omitempty"`
|
ConmonPIDFile string `json:"container_conmon_pidfile,omitempty"`
|
||||||
CPUPeriod uint64
|
CPUPeriod uint64
|
||||||
CPUQuota int64
|
CPUQuota int64
|
||||||
CPURTPeriod uint64
|
CPURTPeriod uint64
|
||||||
CPURTRuntime int64
|
CPURTRuntime int64
|
||||||
CPUShares uint64
|
CPUShares uint64
|
||||||
CPUS float64 `json:"cpus,omitempty"`
|
CPUS float64 `json:"cpus,omitempty"`
|
||||||
CPUSetCPUs string `json:"cpuset_cpus,omitempty"`
|
CPUSetCPUs string `json:"cpuset_cpus,omitempty"`
|
||||||
CPUSetMems string
|
CPUSetMems string
|
||||||
Devices []string `json:"devices,omitempty"`
|
Devices []string `json:"devices,omitempty"`
|
||||||
DeviceCgroupRule []string
|
DeviceCgroupRule []string
|
||||||
DeviceReadBPs []string `json:"device_read_bps,omitempty"`
|
DeviceReadBPs []string `json:"device_read_bps,omitempty"`
|
||||||
DeviceReadIOPs []string
|
DeviceReadIOPs []string
|
||||||
DeviceWriteBPs []string
|
DeviceWriteBPs []string
|
||||||
DeviceWriteIOPs []string
|
DeviceWriteIOPs []string
|
||||||
Entrypoint *string `json:"container_command,omitempty"`
|
Entrypoint *string `json:"container_command,omitempty"`
|
||||||
Env []string
|
Env []string
|
||||||
EnvHost bool
|
EnvHost bool
|
||||||
EnvFile []string
|
EnvFile []string
|
||||||
Expose []string
|
Expose []string
|
||||||
GIDMap []string
|
GIDMap []string
|
||||||
GroupAdd []string
|
GroupAdd []string
|
||||||
HealthCmd string
|
HealthCmd string
|
||||||
HealthInterval string
|
HealthInterval string
|
||||||
HealthRetries uint
|
HealthRetries uint
|
||||||
HealthStartPeriod string
|
HealthStartPeriod string
|
||||||
HealthTimeout string
|
HealthTimeout string
|
||||||
HealthOnFailure string
|
HealthOnFailure string
|
||||||
Hostname string `json:"hostname,omitempty"`
|
Hostname string `json:"hostname,omitempty"`
|
||||||
HTTPProxy bool
|
HTTPProxy bool
|
||||||
HostUsers []string
|
HostUsers []string
|
||||||
ImageVolume string
|
ImageVolume string
|
||||||
Init bool
|
Init bool
|
||||||
InitContainerType string
|
InitContainerType string
|
||||||
InitPath string
|
InitPath string
|
||||||
Interactive bool
|
Interactive bool
|
||||||
IPC string
|
IPC string
|
||||||
Label []string
|
Label []string
|
||||||
LabelFile []string
|
LabelFile []string
|
||||||
LogDriver string
|
LogDriver string
|
||||||
LogOptions []string
|
LogOptions []string
|
||||||
Memory string
|
Memory string
|
||||||
MemoryReservation string
|
MemoryReservation string
|
||||||
MemorySwap string
|
MemorySwap string
|
||||||
MemorySwappiness int64
|
MemorySwappiness int64
|
||||||
Name string `json:"container_name"`
|
Name string `json:"container_name"`
|
||||||
NoHealthCheck bool
|
NoHealthCheck bool
|
||||||
OOMKillDisable bool
|
OOMKillDisable bool
|
||||||
OOMScoreAdj *int
|
OOMScoreAdj *int
|
||||||
Arch string
|
Arch string
|
||||||
OS string
|
OS string
|
||||||
Variant string
|
Variant string
|
||||||
PID string `json:"pid,omitempty"`
|
PID string `json:"pid,omitempty"`
|
||||||
PIDsLimit *int64
|
PIDsLimit *int64
|
||||||
Platform string
|
Platform string
|
||||||
Pod string
|
Pod string
|
||||||
PodIDFile string
|
PodIDFile string
|
||||||
Personality string
|
Personality string
|
||||||
PreserveFDs uint
|
PreserveFDs uint
|
||||||
Privileged bool
|
Privileged bool
|
||||||
PublishAll bool
|
PublishAll bool
|
||||||
Pull string
|
Pull string
|
||||||
Quiet bool
|
Quiet bool
|
||||||
ReadOnly bool
|
ReadOnly bool
|
||||||
ReadOnlyTmpFS bool
|
ReadOnlyTmpFS bool
|
||||||
Restart string
|
Restart string
|
||||||
Replace bool
|
Replace bool
|
||||||
Requires []string
|
Requires []string
|
||||||
Rm bool
|
Rm bool
|
||||||
RootFS bool
|
RootFS bool
|
||||||
Secrets []string
|
Secrets []string
|
||||||
SecurityOpt []string `json:"security_opt,omitempty"`
|
SecurityOpt []string `json:"security_opt,omitempty"`
|
||||||
SdNotifyMode string
|
SdNotifyMode string
|
||||||
ShmSize string
|
ShmSize string
|
||||||
SignaturePolicy string
|
SignaturePolicy string
|
||||||
StopSignal string
|
StartupHCCmd string
|
||||||
StopTimeout uint
|
StartupHCInterval string
|
||||||
StorageOpts []string
|
StartupHCRetries uint
|
||||||
SubUIDName string
|
StartupHCSuccesses uint
|
||||||
SubGIDName string
|
StartupHCTimeout string
|
||||||
Sysctl []string `json:"sysctl,omitempty"`
|
StopSignal string
|
||||||
Systemd string
|
StopTimeout uint
|
||||||
Timeout uint
|
StorageOpts []string
|
||||||
TLSVerify commonFlag.OptionalBool
|
SubUIDName string
|
||||||
TmpFS []string
|
SubGIDName string
|
||||||
TTY bool
|
Sysctl []string `json:"sysctl,omitempty"`
|
||||||
Timezone string
|
Systemd string
|
||||||
Umask string
|
Timeout uint
|
||||||
EnvMerge []string
|
TLSVerify commonFlag.OptionalBool
|
||||||
UnsetEnv []string
|
TmpFS []string
|
||||||
UnsetEnvAll bool
|
TTY bool
|
||||||
UIDMap []string
|
Timezone string
|
||||||
Ulimit []string
|
Umask string
|
||||||
User string
|
EnvMerge []string
|
||||||
UserNS string `json:"-"`
|
UnsetEnv []string
|
||||||
UTS string
|
UnsetEnvAll bool
|
||||||
Mount []string
|
UIDMap []string
|
||||||
Volume []string `json:"volume,omitempty"`
|
Ulimit []string
|
||||||
VolumesFrom []string `json:"volumes_from,omitempty"`
|
User string
|
||||||
Workdir string
|
UserNS string `json:"-"`
|
||||||
SeccompPolicy string
|
UTS string
|
||||||
PidFile string
|
Mount []string
|
||||||
ChrootDirs []string
|
Volume []string `json:"volume,omitempty"`
|
||||||
IsInfra bool
|
VolumesFrom []string `json:"volumes_from,omitempty"`
|
||||||
IsClone bool
|
Workdir string
|
||||||
DecryptionKeys []string
|
SeccompPolicy string
|
||||||
|
PidFile string
|
||||||
Net *NetOptions `json:"net,omitempty"`
|
ChrootDirs []string
|
||||||
|
IsInfra bool
|
||||||
|
IsClone bool
|
||||||
|
DecryptionKeys []string
|
||||||
|
Net *NetOptions `json:"net,omitempty"`
|
||||||
|
|
||||||
CgroupConf []string
|
CgroupConf []string
|
||||||
|
|
||||||
|
@ -8,13 +8,15 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func (ic *ContainerEngine) HealthCheckRun(ctx context.Context, nameOrID string, options entities.HealthCheckOptions) (*define.HealthCheckResults, error) {
|
func (ic *ContainerEngine) HealthCheckRun(ctx context.Context, nameOrID string, options entities.HealthCheckOptions) (*define.HealthCheckResults, error) {
|
||||||
status, err := ic.Libpod.HealthCheck(nameOrID)
|
status, err := ic.Libpod.HealthCheck(ctx, nameOrID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
hcStatus := define.HealthCheckUnhealthy
|
hcStatus := define.HealthCheckUnhealthy
|
||||||
if status == define.HealthCheckSuccess {
|
if status == define.HealthCheckSuccess {
|
||||||
hcStatus = define.HealthCheckHealthy
|
hcStatus = define.HealthCheckHealthy
|
||||||
|
} else if status == define.HealthCheckStartup {
|
||||||
|
hcStatus = define.HealthCheckStarting
|
||||||
}
|
}
|
||||||
report := define.HealthCheckResults{
|
report := define.HealthCheckResults{
|
||||||
Status: hcStatus,
|
Status: hcStatus,
|
||||||
|
@ -527,6 +527,9 @@ func createContainerOptions(rt *libpod.Runtime, s *specgen.SpecGenerator, pod *l
|
|||||||
options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig))
|
options = append(options, libpod.WithHealthCheck(s.ContainerHealthCheckConfig.HealthConfig))
|
||||||
logrus.Debugf("New container has a health check")
|
logrus.Debugf("New container has a health check")
|
||||||
}
|
}
|
||||||
|
if s.ContainerHealthCheckConfig.StartupHealthConfig != nil {
|
||||||
|
options = append(options, libpod.WithStartupHealthcheck(s.ContainerHealthCheckConfig.StartupHealthConfig))
|
||||||
|
}
|
||||||
|
|
||||||
if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
|
if s.ContainerHealthCheckConfig.HealthCheckOnFailureAction != define.HealthCheckOnFailureActionNone {
|
||||||
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))
|
options = append(options, libpod.WithHealthCheckOnFailureAction(s.ContainerHealthCheckConfig.HealthCheckOnFailureAction))
|
||||||
|
@ -536,6 +536,10 @@ type ContainerResourceConfig struct {
|
|||||||
type ContainerHealthCheckConfig struct {
|
type ContainerHealthCheckConfig struct {
|
||||||
HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"`
|
HealthConfig *manifest.Schema2HealthConfig `json:"healthconfig,omitempty"`
|
||||||
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"health_check_on_failure_action,omitempty"`
|
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"health_check_on_failure_action,omitempty"`
|
||||||
|
// Startup healthcheck for a container.
|
||||||
|
// Requires that HealthConfig be set.
|
||||||
|
// Optional.
|
||||||
|
StartupHealthConfig *define.StartupHealthCheck `json:"startupHealthConfig,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SpecGenerator creates an OCI spec and Libpod configuration options to create
|
// SpecGenerator creates an OCI spec and Libpod configuration options to create
|
||||||
|
@ -256,7 +256,7 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
|
|||||||
if c.NoHealthCheck {
|
if c.NoHealthCheck {
|
||||||
return errors.New("cannot specify both --no-healthcheck and --health-cmd")
|
return errors.New("cannot specify both --no-healthcheck and --health-cmd")
|
||||||
}
|
}
|
||||||
s.HealthConfig, err = makeHealthCheckFromCli(c.HealthCmd, c.HealthInterval, c.HealthRetries, c.HealthTimeout, c.HealthStartPeriod)
|
s.HealthConfig, err = makeHealthCheckFromCli(c.HealthCmd, c.HealthInterval, c.HealthRetries, c.HealthTimeout, c.HealthStartPeriod, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -272,6 +272,25 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
|
|||||||
}
|
}
|
||||||
s.HealthCheckOnFailureAction = onFailureAction
|
s.HealthCheckOnFailureAction = onFailureAction
|
||||||
|
|
||||||
|
if c.StartupHCCmd != "" {
|
||||||
|
if c.NoHealthCheck {
|
||||||
|
return errors.New("cannot specify both --no-healthcheck and --health-startup-cmd")
|
||||||
|
}
|
||||||
|
// The hardcoded "1s" will be discarded, as the startup
|
||||||
|
// healthcheck does not have a period. So just hardcode
|
||||||
|
// something that parses correctly.
|
||||||
|
tmpHcConfig, err := makeHealthCheckFromCli(c.StartupHCCmd, c.StartupHCInterval, c.StartupHCRetries, c.StartupHCTimeout, "1s", true)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.StartupHealthConfig = new(define.StartupHealthCheck)
|
||||||
|
s.StartupHealthConfig.Test = tmpHcConfig.Test
|
||||||
|
s.StartupHealthConfig.Interval = tmpHcConfig.Interval
|
||||||
|
s.StartupHealthConfig.Timeout = tmpHcConfig.Timeout
|
||||||
|
s.StartupHealthConfig.Retries = tmpHcConfig.Retries
|
||||||
|
s.StartupHealthConfig.Successes = int(c.StartupHCSuccesses)
|
||||||
|
}
|
||||||
|
|
||||||
if err := setNamespaces(s, c); err != nil {
|
if err := setNamespaces(s, c); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -838,7 +857,7 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, startPeriod string) (*manifest.Schema2HealthConfig, error) {
|
func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, startPeriod string, isStartup bool) (*manifest.Schema2HealthConfig, error) {
|
||||||
cmdArr := []string{}
|
cmdArr := []string{}
|
||||||
isArr := true
|
isArr := true
|
||||||
err := json.Unmarshal([]byte(inCmd), &cmdArr) // array unmarshalling
|
err := json.Unmarshal([]byte(inCmd), &cmdArr) // array unmarshalling
|
||||||
@ -886,7 +905,7 @@ func makeHealthCheckFromCli(inCmd, interval string, retries uint, timeout, start
|
|||||||
|
|
||||||
hc.Interval = intervalDuration
|
hc.Interval = intervalDuration
|
||||||
|
|
||||||
if retries < 1 {
|
if retries < 1 && !isStartup {
|
||||||
return nil, errors.New("healthcheck-retries must be greater than 0")
|
return nil, errors.New("healthcheck-retries must be greater than 0")
|
||||||
}
|
}
|
||||||
hc.Retries = int(retries)
|
hc.Retries = int(retries)
|
||||||
|
@ -334,4 +334,43 @@ HEALTHCHECK CMD ls -l / 2>&1`, ALPINE)
|
|||||||
// Check to make sure characters were not coerced to utf8
|
// Check to make sure characters were not coerced to utf8
|
||||||
Expect(inspect[0].Config.Healthcheck).To(HaveField("Test", []string{"CMD-SHELL", "ls -l / 2>&1"}))
|
Expect(inspect[0].Config.Healthcheck).To(HaveField("Test", []string{"CMD-SHELL", "ls -l / 2>&1"}))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
It("Startup healthcheck success transitions to regular healthcheck", func() {
|
||||||
|
ctrName := "hcCtr"
|
||||||
|
ctrRun := podmanTest.Podman([]string{"run", "-dt", "--name", ctrName, "--health-cmd", "echo regular", "--health-startup-cmd", "cat /test", ALPINE, "top"})
|
||||||
|
ctrRun.WaitWithDefaultTimeout()
|
||||||
|
Expect(ctrRun).Should(Exit(0))
|
||||||
|
|
||||||
|
inspect := podmanTest.InspectContainer(ctrName)
|
||||||
|
Expect(inspect[0].State.Health).To(HaveField("Status", "starting"))
|
||||||
|
|
||||||
|
hc := podmanTest.Podman([]string{"healthcheck", "run", ctrName})
|
||||||
|
hc.WaitWithDefaultTimeout()
|
||||||
|
Expect(hc).Should(Exit(1))
|
||||||
|
|
||||||
|
exec := podmanTest.Podman([]string{"exec", ctrName, "sh", "-c", "touch /test && echo startup > /test"})
|
||||||
|
exec.WaitWithDefaultTimeout()
|
||||||
|
Expect(exec).Should(Exit(0))
|
||||||
|
|
||||||
|
hc = podmanTest.Podman([]string{"healthcheck", "run", ctrName})
|
||||||
|
hc.WaitWithDefaultTimeout()
|
||||||
|
Expect(hc).Should(Exit(0))
|
||||||
|
|
||||||
|
inspect = podmanTest.InspectContainer(ctrName)
|
||||||
|
Expect(inspect[0].State.Health).To(HaveField("Status", define.HealthCheckHealthy))
|
||||||
|
|
||||||
|
hc = podmanTest.Podman([]string{"healthcheck", "run", ctrName})
|
||||||
|
hc.WaitWithDefaultTimeout()
|
||||||
|
Expect(hc).Should(Exit(0))
|
||||||
|
|
||||||
|
inspect = podmanTest.InspectContainer(ctrName)
|
||||||
|
Expect(inspect[0].State.Health).To(HaveField("Status", define.HealthCheckHealthy))
|
||||||
|
|
||||||
|
// Test podman ps --filter heath is working (#11687)
|
||||||
|
ps := podmanTest.Podman([]string{"ps", "--filter", "health=healthy"})
|
||||||
|
ps.WaitWithDefaultTimeout()
|
||||||
|
Expect(ps).Should(Exit(0))
|
||||||
|
Expect(ps.OutputToStringArray()).To(HaveLen(2))
|
||||||
|
Expect(ps.OutputToString()).To(ContainSubstring("hc"))
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
Reference in New Issue
Block a user