health check: ignore dependencies of transient systemd units/timers

When stopping the transient systemd timer/unit which powers running
health checks, make sure to ignore its dependencies.  It turns out
that we're otherwise running into a timeout when running a container in
a systemd unit and reboot.

An alternative may be to further tweak some attributes/options when
creating the timer/unit via systemd-run but it seems safe to just ignore
the dependencies and stop.

[NO NEW TESTS NEEDED] - we don't yet have means to test reboots.

Fixes: #14531
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2022-12-08 15:01:57 +01:00
parent 6e2e9ab227
commit 1ed982753c

View File

@ -111,7 +111,7 @@ func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) er
// fire after the service is stopped.
timerChan := make(chan string)
timerFile := fmt.Sprintf("%s.timer", c.hcUnitName(isStartup))
if _, err := conn.StopUnitContext(ctx, timerFile, "fail", timerChan); err != nil {
if _, err := conn.StopUnitContext(ctx, timerFile, "ignore-dependencies", timerChan); err != nil {
if !strings.HasSuffix(err.Error(), ".timer not loaded.") {
stopErrors = append(stopErrors, fmt.Errorf("removing health-check timer %q: %w", timerFile, err))
}
@ -126,7 +126,7 @@ func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) er
if err := conn.ResetFailedUnitContext(ctx, serviceFile); err != nil {
logrus.Debugf("Failed to reset unit file: %q", err)
}
if _, err := conn.StopUnitContext(ctx, serviceFile, "fail", serviceChan); err != nil {
if _, err := conn.StopUnitContext(ctx, serviceFile, "ignore-dependencies", serviceChan); err != nil {
if !strings.HasSuffix(err.Error(), ".service not loaded.") {
stopErrors = append(stopErrors, fmt.Errorf("removing health-check service %q: %w", serviceFile, err))
}