From f131eaa74aa603e69b7c71ae073302b359ff1502 Mon Sep 17 00:00:00 2001
From: Valentin Rothberg <vrothberg@redhat.com>
Date: Tue, 28 Mar 2023 14:54:26 +0200
Subject: [PATCH] auto-update: stop+start instead of restart sytemd units

It turns out the restart is _not_ a stop+start but keeps certain
resources open and is subject to some timeouts that may differ across
distributions' default settings.

[NO NEW TESTS NEEDED] as I have absolutely no idea how to reliably cause
the failure/flake/race.

Also ignore ENOENTS of the CID file when removing a container which has
been identified of actually fixing #17607.

Fixes: #17607
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
---
 libpod/runtime_ctr.go        |  2 +-
 pkg/autoupdate/autoupdate.go | 33 +++++++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go
index 6a90785161..648118c374 100644
--- a/libpod/runtime_ctr.go
+++ b/libpod/runtime_ctr.go
@@ -811,7 +811,7 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
 
 	// Remove the container's CID file on container removal.
 	if cidFile, ok := c.config.Spec.Annotations[define.InspectAnnotationCIDFile]; ok {
-		if err := os.Remove(cidFile); err != nil {
+		if err := os.Remove(cidFile); err != nil && !errors.Is(err, os.ErrNotExist) {
 			if cleanupErr == nil {
 				cleanupErr = err
 			} else {
diff --git a/pkg/autoupdate/autoupdate.go b/pkg/autoupdate/autoupdate.go
index 10258701f8..ccd81d8cdb 100644
--- a/pkg/autoupdate/autoupdate.go
+++ b/pkg/autoupdate/autoupdate.go
@@ -334,8 +334,16 @@ func (t *task) rollbackImage() error {
 
 // restartSystemdUnit restarts the systemd unit the container is running in.
 func (u *updater) restartSystemdUnit(ctx context.Context, unit string) error {
+	if err := u.stopSystemdUnit(ctx, unit); err != nil {
+		return err
+	}
+	return u.startSystemdUnit(ctx, unit)
+}
+
+// startSystemdUnit starts the systemd unit the container is running in.
+func (u *updater) startSystemdUnit(ctx context.Context, unit string) error {
 	restartChan := make(chan string)
-	if _, err := u.conn.RestartUnitContext(ctx, unit, "replace", restartChan); err != nil {
+	if _, err := u.conn.StartUnitContext(ctx, unit, "replace", restartChan); err != nil {
 		return err
 	}
 
@@ -349,7 +357,28 @@ func (u *updater) restartSystemdUnit(ctx context.Context, unit string) error {
 		return nil
 
 	default:
-		return fmt.Errorf("expected %q but received %q", "done", result)
+		return fmt.Errorf("error starting systemd unit %q expected %q but received %q", unit, "done", result)
+	}
+}
+
+// stopSystemdUnit stop the systemd unit the container is running in.
+func (u *updater) stopSystemdUnit(ctx context.Context, unit string) error {
+	restartChan := make(chan string)
+	if _, err := u.conn.StopUnitContext(ctx, unit, "replace", restartChan); err != nil {
+		return err
+	}
+
+	// Wait for the restart to finish and actually check if it was
+	// successful or not.
+	result := <-restartChan
+
+	switch result {
+	case "done":
+		logrus.Infof("Successfully stopped systemd unit %q", unit)
+		return nil
+
+	default:
+		return fmt.Errorf("error stopping systemd unit %q expected %q but received %q", unit, "done", result)
 	}
 }