From 09b86e26d1f2c607169851c7a5058661aecbb17b Mon Sep 17 00:00:00 2001 From: "Jason T. Greene" Date: Sun, 11 Feb 2024 12:45:19 -0600 Subject: [PATCH] Improve robustness of pipe checks Improve error reporting on ssh readiness check Signed-off-by: Jason T. Greene --- pkg/machine/machine_windows.go | 29 +++++++++++++---- pkg/machine/shim/networking.go | 44 +++++++++++++++++--------- pkg/machine/shim/networking_windows.go | 4 +-- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/pkg/machine/machine_windows.go b/pkg/machine/machine_windows.go index 01f5fa4dc8..3e19f4d98e 100644 --- a/pkg/machine/machine_windows.go +++ b/pkg/machine/machine_windows.go @@ -4,7 +4,9 @@ package machine import ( "context" + "errors" "fmt" + "io/fs" "net" "os" "os/exec" @@ -13,8 +15,8 @@ import ( "syscall" "time" - "github.com/containers/podman/v5/pkg/machine/define" winio "github.com/Microsoft/go-winio" + "github.com/containers/podman/v5/pkg/machine/define" "github.com/sirupsen/logrus" ) @@ -25,6 +27,10 @@ const ( winSshProxyTid = "win-sshproxy.tid" rootfulSock = "/run/podman/podman.sock" rootlessSock = "/run/user/1000/podman/podman.sock" + + // machine wait is longer since we must hard fail + MachineNameWait = 5 * time.Second + GlobalNameWait = 250 * time.Millisecond ) const WM_QUIT = 0x12 //nolint @@ -53,9 +59,20 @@ func GetProcessState(pid int) (active bool, exitCode int) { return code == 259, int(code) } -func PipeNameAvailable(pipeName string) bool { - _, err := os.Stat(`\\.\pipe\` + pipeName) - return os.IsNotExist(err) +func PipeNameAvailable(pipeName string, maxWait time.Duration) bool { + const interval = 250 * time.Millisecond + var wait time.Duration + for { + _, err := os.Stat(`\\.\pipe\` + pipeName) + if errors.Is(err, fs.ErrNotExist) { + return true + } + if wait >= maxWait { + return false + } + time.Sleep(interval) + wait += interval + } } func WaitPipeExists(pipeName string, retries int, checkFailure func() error) error { @@ -105,12 +122,12 @@ func LaunchWinProxy(opts WinProxyOpts, noInfo bool) { func launchWinProxy(opts WinProxyOpts) (bool, string, error) { machinePipe := ToDist(opts.Name) - if !PipeNameAvailable(machinePipe) { + if !PipeNameAvailable(machinePipe, MachineNameWait) { return false, "", fmt.Errorf("could not start api proxy since expected pipe is not available: %s", machinePipe) } globalName := false - if PipeNameAvailable(GlobalNamedPipe) { + if PipeNameAvailable(GlobalNamedPipe, GlobalNameWait) { globalName = true } diff --git a/pkg/machine/shim/networking.go b/pkg/machine/shim/networking.go index 6305458185..57ad53b7eb 100644 --- a/pkg/machine/shim/networking.go +++ b/pkg/machine/shim/networking.go @@ -1,6 +1,7 @@ package shim import ( + "errors" "fmt" "net" "path/filepath" @@ -21,6 +22,11 @@ const ( dockerConnectTimeout = 5 * time.Second ) +var ( + ErrNotRunning = errors.New("machine not in running state") + ErrSSHNotListening = errors.New("machine is not listening on ssh port") +) + func startHostForwarder(mc *vmconfigs.MachineConfig, provider vmconfigs.VMProvider, dirs *define.MachineDirs, hostSocks []string) error { forwardUser := mc.SSH.RemoteUsername @@ -119,22 +125,30 @@ func conductVMReadinessCheck(mc *vmconfigs.MachineConfig, maxBackoffs int, backo if err != nil { return false, nil, err } - if state == define.Running && isListening(mc.SSH.Port) { - // Also make sure that SSH is up and running. The - // ready service's dependencies don't fully make sure - // that clients can SSH into the machine immediately - // after boot. - // - // CoreOS users have reported the same observation but - // the underlying source of the issue remains unknown. - - if sshError = machine.CommonSSHSilent(mc.SSH.RemoteUsername, mc.SSH.IdentityPath, mc.Name, mc.SSH.Port, []string{"true"}); sshError != nil { - logrus.Debugf("SSH readiness check for machine failed: %v", sshError) - continue - } - connected = true - break + if state != define.Running { + sshError = ErrNotRunning + continue } + if !isListening(mc.SSH.Port) { + sshError = ErrSSHNotListening + continue + } + + // Also make sure that SSH is up and running. The + // ready service's dependencies don't fully make sure + // that clients can SSH into the machine immediately + // after boot. + // + // CoreOS users have reported the same observation but + // the underlying source of the issue remains unknown. + + if sshError = machine.CommonSSHSilent(mc.SSH.RemoteUsername, mc.SSH.IdentityPath, mc.Name, mc.SSH.Port, []string{"true"}); sshError != nil { + logrus.Debugf("SSH readiness check for machine failed: %v", sshError) + continue + } + connected = true + sshError = nil + break } return } diff --git a/pkg/machine/shim/networking_windows.go b/pkg/machine/shim/networking_windows.go index d13d4a0036..970aefd5aa 100644 --- a/pkg/machine/shim/networking_windows.go +++ b/pkg/machine/shim/networking_windows.go @@ -9,13 +9,13 @@ import ( func setupMachineSockets(name string, dirs *define.MachineDirs) ([]string, string, machine.APIForwardingState, error) { machinePipe := machine.ToDist(name) - if !machine.PipeNameAvailable(machinePipe) { + if !machine.PipeNameAvailable(machinePipe, machine.MachineNameWait) { return nil, "", 0, fmt.Errorf("could not start api proxy since expected pipe is not available: %s", machinePipe) } sockets := []string{machine.NamedPipePrefix + machinePipe} state := machine.MachineLocal - if machine.PipeNameAvailable(machine.GlobalNamedPipe) { + if machine.PipeNameAvailable(machine.GlobalNamedPipe, machine.GlobalNameWait) { sockets = append(sockets, machine.NamedPipePrefix+machine.GlobalNamedPipe) state = machine.DockerGlobal }