From c341a0ffe01fbcdcd272c72d928e5220ad004d73 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg <vrothberg@redhat.com> Date: Thu, 27 Jul 2023 14:00:07 +0200 Subject: [PATCH] machine: QEMU: lock VM on start Lock the VM on start. If the machine is in the "starting" state we know that a previous start has failed and guide the user into resolving the issue. Concurrent starts will busy wait and return the expected "already running" error. NOTE: this change is only looking at the start issue (#18662). Other commands such as stop and update should also lock and will be updated in a future change. I expect the underlying issue to apply to all machine providers, not only QEMU. It's desirable to aim for extending the machine interface to also allow to `Lock()` and `Unlock()`. After acquiring the lock, the VM should automatically be reloaded/updated. [NO NEW TESTS NEEDED] Fixes: #18662 Signed-off-by: Valentin Rothberg <vrothberg@redhat.com> --- pkg/machine/qemu/config.go | 3 ++- pkg/machine/qemu/machine.go | 44 +++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/pkg/machine/qemu/config.go b/pkg/machine/qemu/config.go index b061d7635a..84c68ef70c 100644 --- a/pkg/machine/qemu/config.go +++ b/pkg/machine/qemu/config.go @@ -215,8 +215,9 @@ func (p *QEMUVirtualization) CheckExclusiveActiveVM() (bool, string, error) { if err != nil { return false, "", fmt.Errorf("checking VM active: %w", err) } + // NOTE: Start() takes care of dealing with the "starting" state. for _, vm := range vms { - if vm.Running || vm.Starting { + if vm.Running { return true, vm.Name, nil } } diff --git a/pkg/machine/qemu/machine.go b/pkg/machine/qemu/machine.go index b85c19808f..6dbf57ba61 100644 --- a/pkg/machine/qemu/machine.go +++ b/pkg/machine/qemu/machine.go @@ -27,6 +27,7 @@ import ( "github.com/containers/podman/v4/pkg/rootless" "github.com/containers/podman/v4/pkg/util" "github.com/containers/storage/pkg/ioutils" + "github.com/containers/storage/pkg/lockfile" "github.com/digitalocean/go-qemu/qmp" "github.com/sirupsen/logrus" ) @@ -75,6 +76,32 @@ type MachineVM struct { Created time.Time // LastUp contains the last recorded uptime LastUp time.Time + + // User at runtime for serializing write operations. + lock *lockfile.LockFile +} + +func (v *MachineVM) setLock() error { + if v.lock != nil { + return nil + } + + // FIXME: there's a painful amount of `GetConfDir` calls scattered + // across the code base. This should be done once and stored + // somewhere instead. + vmConfigDir, err := machine.GetConfDir(vmtype) + if err != nil { + return err + } + + lockPath := filepath.Join(vmConfigDir, v.Name+".lock") + lock, err := lockfile.GetLockFile(lockPath) + if err != nil { + return fmt.Errorf("creating lockfile for VM: %w", err) + } + + v.lock = lock + return nil } type Monitor struct { @@ -606,6 +633,23 @@ func (v *MachineVM) Start(name string, opts machine.StartOptions) error { defaultBackoff := 500 * time.Millisecond maxBackoffs := 6 + if err := v.setLock(); err != nil { + return err + } + v.lock.Lock() + defer v.lock.Unlock() + + state, err := v.State(false) + if err != nil { + return err + } + switch state { + case machine.Starting: + return fmt.Errorf("cannot start VM %q: starting state indicates that a previous start has failed: please stop and restart the VM", v.Name) + case machine.Running: + return fmt.Errorf("cannot start VM %q: %w", v.Name, machine.ErrVMAlreadyRunning) + } + v.Starting = true if err := v.writeConfig(); err != nil { return fmt.Errorf("writing JSON file: %w", err)