auto-update: stop+start instead of restart sytemd units

It turns out the restart is _not_ a stop+start but keeps certain
resources open and is subject to some timeouts that may differ across
distributions' default settings.

[NO NEW TESTS NEEDED] as I have absolutely no idea how to reliably cause
the failure/flake/race.

Also ignore ENOENTS of the CID file when removing a container which has
been identified of actually fixing #17607.

Fixes: #17607
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
This commit is contained in:
Valentin Rothberg
2023-03-28 14:54:26 +02:00
parent c8eeab21cf
commit f131eaa74a
2 changed files with 32 additions and 3 deletions

View File

@ -811,7 +811,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
// Remove the container's CID file on container removal.
if cidFile, ok := c.config.Spec.Annotations[define.InspectAnnotationCIDFile]; ok {
if err := os.Remove(cidFile); err != nil {
if err := os.Remove(cidFile); err != nil && !errors.Is(err, os.ErrNotExist) {
if cleanupErr == nil {
cleanupErr = err
} else {

View File

@ -334,8 +334,16 @@ func (t *task) rollbackImage() error {
// restartSystemdUnit restarts the systemd unit the container is running in.
func (u *updater) restartSystemdUnit(ctx context.Context, unit string) error {
if err := u.stopSystemdUnit(ctx, unit); err != nil {
return err
}
return u.startSystemdUnit(ctx, unit)
}
// startSystemdUnit starts the systemd unit the container is running in.
func (u *updater) startSystemdUnit(ctx context.Context, unit string) error {
restartChan := make(chan string)
if _, err := u.conn.RestartUnitContext(ctx, unit, "replace", restartChan); err != nil {
if _, err := u.conn.StartUnitContext(ctx, unit, "replace", restartChan); err != nil {
return err
}
@ -349,7 +357,28 @@ func (u *updater) restartSystemdUnit(ctx context.Context, unit string) error {
return nil
default:
return fmt.Errorf("expected %q but received %q", "done", result)
return fmt.Errorf("error starting systemd unit %q expected %q but received %q", unit, "done", result)
}
}
// stopSystemdUnit stop the systemd unit the container is running in.
func (u *updater) stopSystemdUnit(ctx context.Context, unit string) error {
restartChan := make(chan string)
if _, err := u.conn.StopUnitContext(ctx, unit, "replace", restartChan); err != nil {
return err
}
// Wait for the restart to finish and actually check if it was
// successful or not.
result := <-restartChan
switch result {
case "done":
logrus.Infof("Successfully stopped systemd unit %q", unit)
return nil
default:
return fmt.Errorf("error stopping systemd unit %q expected %q but received %q", unit, "done", result)
}
}