Improve robustness of pipe checks

Improve error reporting on ssh readiness check

Signed-off-by: Jason T. Greene <jason.greene@redhat.com>
This commit is contained in:
Jason T. Greene
2024-02-11 12:45:19 -06:00
parent 487219d809
commit 09b86e26d1
3 changed files with 54 additions and 23 deletions

View File

@ -4,7 +4,9 @@ package machine
import (
"context"
"errors"
"fmt"
"io/fs"
"net"
"os"
"os/exec"
@ -13,8 +15,8 @@ import (
"syscall"
"time"
"github.com/containers/podman/v5/pkg/machine/define"
winio "github.com/Microsoft/go-winio"
"github.com/containers/podman/v5/pkg/machine/define"
"github.com/sirupsen/logrus"
)
@ -25,6 +27,10 @@ const (
winSshProxyTid = "win-sshproxy.tid"
rootfulSock = "/run/podman/podman.sock"
rootlessSock = "/run/user/1000/podman/podman.sock"
// machine wait is longer since we must hard fail
MachineNameWait = 5 * time.Second
GlobalNameWait = 250 * time.Millisecond
)
const WM_QUIT = 0x12 //nolint
@ -53,9 +59,20 @@ func GetProcessState(pid int) (active bool, exitCode int) {
return code == 259, int(code)
}
func PipeNameAvailable(pipeName string) bool {
_, err := os.Stat(`\\.\pipe\` + pipeName)
return os.IsNotExist(err)
func PipeNameAvailable(pipeName string, maxWait time.Duration) bool {
const interval = 250 * time.Millisecond
var wait time.Duration
for {
_, err := os.Stat(`\\.\pipe\` + pipeName)
if errors.Is(err, fs.ErrNotExist) {
return true
}
if wait >= maxWait {
return false
}
time.Sleep(interval)
wait += interval
}
}
func WaitPipeExists(pipeName string, retries int, checkFailure func() error) error {
@ -105,12 +122,12 @@ func LaunchWinProxy(opts WinProxyOpts, noInfo bool) {
func launchWinProxy(opts WinProxyOpts) (bool, string, error) {
machinePipe := ToDist(opts.Name)
if !PipeNameAvailable(machinePipe) {
if !PipeNameAvailable(machinePipe, MachineNameWait) {
return false, "", fmt.Errorf("could not start api proxy since expected pipe is not available: %s", machinePipe)
}
globalName := false
if PipeNameAvailable(GlobalNamedPipe) {
if PipeNameAvailable(GlobalNamedPipe, GlobalNameWait) {
globalName = true
}

View File

@ -1,6 +1,7 @@
package shim
import (
"errors"
"fmt"
"net"
"path/filepath"
@ -21,6 +22,11 @@ const (
dockerConnectTimeout = 5 * time.Second
)
var (
ErrNotRunning = errors.New("machine not in running state")
ErrSSHNotListening = errors.New("machine is not listening on ssh port")
)
func startHostForwarder(mc *vmconfigs.MachineConfig, provider vmconfigs.VMProvider, dirs *define.MachineDirs, hostSocks []string) error {
forwardUser := mc.SSH.RemoteUsername
@ -119,22 +125,30 @@ func conductVMReadinessCheck(mc *vmconfigs.MachineConfig, maxBackoffs int, backo
if err != nil {
return false, nil, err
}
if state == define.Running && isListening(mc.SSH.Port) {
// Also make sure that SSH is up and running. The
// ready service's dependencies don't fully make sure
// that clients can SSH into the machine immediately
// after boot.
//
// CoreOS users have reported the same observation but
// the underlying source of the issue remains unknown.
if sshError = machine.CommonSSHSilent(mc.SSH.RemoteUsername, mc.SSH.IdentityPath, mc.Name, mc.SSH.Port, []string{"true"}); sshError != nil {
logrus.Debugf("SSH readiness check for machine failed: %v", sshError)
continue
}
connected = true
break
if state != define.Running {
sshError = ErrNotRunning
continue
}
if !isListening(mc.SSH.Port) {
sshError = ErrSSHNotListening
continue
}
// Also make sure that SSH is up and running. The
// ready service's dependencies don't fully make sure
// that clients can SSH into the machine immediately
// after boot.
//
// CoreOS users have reported the same observation but
// the underlying source of the issue remains unknown.
if sshError = machine.CommonSSHSilent(mc.SSH.RemoteUsername, mc.SSH.IdentityPath, mc.Name, mc.SSH.Port, []string{"true"}); sshError != nil {
logrus.Debugf("SSH readiness check for machine failed: %v", sshError)
continue
}
connected = true
sshError = nil
break
}
return
}

View File

@ -9,13 +9,13 @@ import (
func setupMachineSockets(name string, dirs *define.MachineDirs) ([]string, string, machine.APIForwardingState, error) {
machinePipe := machine.ToDist(name)
if !machine.PipeNameAvailable(machinePipe) {
if !machine.PipeNameAvailable(machinePipe, machine.MachineNameWait) {
return nil, "", 0, fmt.Errorf("could not start api proxy since expected pipe is not available: %s", machinePipe)
}
sockets := []string{machine.NamedPipePrefix + machinePipe}
state := machine.MachineLocal
if machine.PipeNameAvailable(machine.GlobalNamedPipe) {
if machine.PipeNameAvailable(machine.GlobalNamedPipe, machine.GlobalNameWait) {
sockets = append(sockets, machine.NamedPipePrefix+machine.GlobalNamedPipe)
state = machine.DockerGlobal
}