Configure HealthCheck with podman update

New flags in a `podman update` can change the configuration of HealthCheck when the container is started, without having to restart or recreate the container.

This can help determine why a given container suddenly started failing HealthCheck without interfering with the services it provides. For example, reconfigure HealthCheck to keep logs longer than the usual last X results, store logs to other destinations, etc.

Fixes: https://issues.redhat.com/browse/RHEL-60561

Signed-off-by: Jan Rodák <hony.com@seznam.cz>
This commit is contained in:
Jan Rodák
2024-10-24 14:01:58 +02:00
parent 77e67e7a54
commit a1249425bd
34 changed files with 958 additions and 198 deletions

View File

@ -2,6 +2,8 @@ package define
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/containers/image/v5/manifest"
@ -155,3 +157,186 @@ type StartupHealthCheck struct {
// If set to 0, a single success will mark the HC as passed.
Successes int `json:",omitempty"`
}
type UpdateHealthCheckConfig struct {
// HealthLogDestination set the destination of the HealthCheck log.
// Directory path, local or events_logger (local use container state file)
// Warning: Changing this setting may cause the loss of previous logs!
HealthLogDestination *string `json:"health_log_destination,omitempty"`
// HealthMaxLogSize set maximum length in characters of stored HealthCheck log.
// ('0' value means an infinite log length)
HealthMaxLogSize *uint `json:"health_max_log_size,omitempty"`
// HealthMaxLogCount set maximum number of attempts in the HealthCheck log file.
// ('0' value means an infinite number of attempts in the log file)
HealthMaxLogCount *uint `json:"health_max_log_count,omitempty"`
// HealthOnFailure set the action to take once the container turns unhealthy.
HealthOnFailure *string `json:"health_on_failure,omitempty"`
// Disable healthchecks on container.
NoHealthCheck *bool `json:"no_healthcheck,omitempty"`
// HealthCmd set a healthcheck command for the container. ('none' disables the existing healthcheck)
HealthCmd *string `json:"health_cmd,omitempty"`
// HealthInterval set an interval for the healthcheck.
// (a value of disable results in no automatic timer setup) Changing this setting resets timer.
HealthInterval *string `json:"health_interval,omitempty"`
// HealthRetries set the number of retries allowed before a healthcheck is considered to be unhealthy.
HealthRetries *uint `json:"health_retries,omitempty"`
// HealthTimeout set the maximum time allowed to complete the healthcheck before an interval is considered failed.
HealthTimeout *string `json:"health_timeout,omitempty"`
// HealthStartPeriod set the initialization time needed for a container to bootstrap.
HealthStartPeriod *string `json:"health_start_period,omitempty"`
// HealthStartupCmd set a startup healthcheck command for the container.
HealthStartupCmd *string `json:"health_startup_cmd,omitempty"`
// HealthStartupInterval set an interval for the startup healthcheck.
// Changing this setting resets the timer, depending on the state of the container.
HealthStartupInterval *string `json:"health_startup_interval,omitempty"`
// HealthStartupRetries set the maximum number of retries before the startup healthcheck will restart the container.
HealthStartupRetries *uint `json:"health_startup_retries,omitempty"`
// HealthStartupTimeout set the maximum amount of time that the startup healthcheck may take before it is considered failed.
HealthStartupTimeout *string `json:"health_startup_timeout,omitempty"`
// HealthStartupSuccess set the number of consecutive successes before the startup healthcheck is marked as successful
// and the normal healthcheck begins (0 indicates any success will start the regular healthcheck)
HealthStartupSuccess *uint `json:"health_startup_success,omitempty"`
}
func (u *UpdateHealthCheckConfig) IsStartupHealthCheckCommandSet(startupHealthCheck *StartupHealthCheck) bool {
containsStartupHealthCheckCmd := u.HealthStartupCmd != nil
containsFlags := (u.HealthStartupInterval != nil || u.HealthStartupRetries != nil ||
u.HealthStartupTimeout != nil || u.HealthStartupSuccess != nil)
return startupHealthCheck == nil && !containsStartupHealthCheckCmd && containsFlags
}
func (u *UpdateHealthCheckConfig) IsHealthCheckCommandSet(healthCheck *manifest.Schema2HealthConfig) bool {
containsStartupHealthCheckCmd := u.HealthCmd != nil
containsFlags := (u.HealthInterval != nil || u.HealthRetries != nil ||
u.HealthTimeout != nil || u.HealthStartPeriod != nil)
return healthCheck == nil && !containsStartupHealthCheckCmd && containsFlags
}
func (u *UpdateHealthCheckConfig) SetNewStartupHealthCheckConfigTo(healthCheckOptions *HealthCheckOptions) bool {
changed := false
if u.HealthStartupCmd != nil {
healthCheckOptions.Cmd = *u.HealthStartupCmd
changed = true
}
if u.HealthStartupInterval != nil {
healthCheckOptions.Interval = *u.HealthStartupInterval
changed = true
}
if u.HealthStartupRetries != nil {
healthCheckOptions.Retries = int(*u.HealthStartupRetries)
changed = true
}
if u.HealthStartupTimeout != nil {
healthCheckOptions.Timeout = *u.HealthStartupTimeout
changed = true
}
if u.HealthStartupSuccess != nil {
healthCheckOptions.Successes = int(*u.HealthStartupSuccess)
changed = true
}
healthCheckOptions.StartPeriod = "1s"
return changed
}
func (u *UpdateHealthCheckConfig) SetNewHealthCheckConfigTo(healthCheckOptions *HealthCheckOptions) bool {
changed := false
if u.HealthCmd != nil {
healthCheckOptions.Cmd = *u.HealthCmd
changed = true
}
if u.HealthInterval != nil {
healthCheckOptions.Interval = *u.HealthInterval
changed = true
}
if u.HealthRetries != nil {
healthCheckOptions.Retries = int(*u.HealthRetries)
changed = true
}
if u.HealthTimeout != nil {
healthCheckOptions.Timeout = *u.HealthTimeout
changed = true
}
if u.HealthStartPeriod != nil {
healthCheckOptions.StartPeriod = *u.HealthStartPeriod
changed = true
}
return changed
}
func GetValidHealthCheckDestination(destination string) (string, error) {
if destination == HealthCheckEventsLoggerDestination || destination == DefaultHealthCheckLocalDestination {
return destination, nil
}
fileInfo, err := os.Stat(destination)
if err != nil {
return "", fmt.Errorf("HealthCheck Log '%s' destination error: %w", destination, err)
}
mode := fileInfo.Mode()
if !mode.IsDir() {
return "", fmt.Errorf("HealthCheck Log '%s' destination must be directory", destination)
}
absPath, err := filepath.Abs(destination)
if err != nil {
return "", err
}
return absPath, nil
}
func (u *UpdateHealthCheckConfig) GetNewGlobalHealthCheck() (GlobalHealthCheckOptions, error) {
globalOptions := GlobalHealthCheckOptions{}
healthLogDestination := u.HealthLogDestination
if u.HealthLogDestination != nil {
dest, err := GetValidHealthCheckDestination(*u.HealthLogDestination)
if err != nil {
return GlobalHealthCheckOptions{}, err
}
healthLogDestination = &dest
}
globalOptions.HealthLogDestination = healthLogDestination
globalOptions.HealthMaxLogSize = u.HealthMaxLogSize
globalOptions.HealthMaxLogCount = u.HealthMaxLogCount
if u.HealthOnFailure != nil {
val, err := ParseHealthCheckOnFailureAction(*u.HealthOnFailure)
if err != nil {
return globalOptions, err
}
globalOptions.HealthCheckOnFailureAction = &val
}
return globalOptions, nil
}
type HealthCheckOptions struct {
Cmd string
Interval string
Retries int
Timeout string
StartPeriod string
Successes int
}
type GlobalHealthCheckOptions struct {
HealthLogDestination *string
HealthMaxLogCount *uint
HealthMaxLogSize *uint
HealthCheckOnFailureAction *HealthCheckOnFailureAction
}