From 63a8926e96203cf0070e0b0cb14e79815942f0a0 Mon Sep 17 00:00:00 2001
From: "Jason T. Greene" <jason.greene@redhat.com>
Date: Tue, 8 Aug 2023 13:05:07 -0500
Subject: [PATCH] Implement automatic port reassignment on Windows

While only leveraged by the WSL backend, this commit also adds core
infrastructure for all other backends for future enhancement.

- Adds a common port cross backend allocation registry to prevent duplicate
  assignment across multiple machine instances
- Introduces logic in Start() that detects OS port conflicts and scans for a
  viable replacement port
- Updates connection definitions and server configuration accordingly
- Utilizes a coordinated file lock strategy to prevent racing overwrites of port
  and connection registries
- WSL backend coordinates locking for containers.conf until a future common
  enhancement exists to replace it

[NO NEW TESTS NEEDED]

Signed-off-by: Jason T. Greene <jason.greene@redhat.com>
---
 pkg/machine/config.go        |  11 ++
 pkg/machine/connection.go    |  15 +++
 pkg/machine/ports.go         | 213 +++++++++++++++++++++++++++++++++++
 pkg/machine/ports_unix.go    |  29 +++++
 pkg/machine/ports_windows.go |  28 +++++
 pkg/machine/wsl/config.go    |   3 +-
 pkg/machine/wsl/machine.go   |  97 +++++++++++++++-
 7 files changed, 393 insertions(+), 3 deletions(-)
 create mode 100644 pkg/machine/ports.go
 create mode 100644 pkg/machine/ports_unix.go
 create mode 100644 pkg/machine/ports_windows.go

diff --git a/pkg/machine/config.go b/pkg/machine/config.go
index ebcb1495ca..c773de2b24 100644
--- a/pkg/machine/config.go
+++ b/pkg/machine/config.go
@@ -199,6 +199,17 @@ func GetDataDir(vmType VMType) (string, error) {
 	return dataDir, mkdirErr
 }
 
+// GetGLobalDataDir returns the root of all backends
+// for shared machine data.
+func GetGlobalDataDir() (string, error) {
+	dataDir, err := DataDirPrefix()
+	if err != nil {
+		return "", err
+	}
+
+	return dataDir, os.MkdirAll(dataDir, 0755)
+}
+
 // DataDirPrefix returns the path prefix for all machine data files
 func DataDirPrefix() (string, error) {
 	data, err := homedir.GetDataHome()
diff --git a/pkg/machine/connection.go b/pkg/machine/connection.go
index 5633cd5cce..ebecda83e3 100644
--- a/pkg/machine/connection.go
+++ b/pkg/machine/connection.go
@@ -58,6 +58,21 @@ func AnyConnectionDefault(name ...string) (bool, error) {
 	return false, nil
 }
 
+func ChangeConnectionURI(name string, uri fmt.Stringer) error {
+	cfg, err := config.ReadCustomConfig()
+	if err != nil {
+		return err
+	}
+	dst, ok := cfg.Engine.ServiceDestinations[name]
+	if !ok {
+		return errors.New("connection not found")
+	}
+	dst.URI = uri.String()
+	cfg.Engine.ServiceDestinations[name] = dst
+
+	return cfg.Write()
+}
+
 func ChangeDefault(name string) error {
 	cfg, err := config.ReadCustomConfig()
 	if err != nil {
diff --git a/pkg/machine/ports.go b/pkg/machine/ports.go
new file mode 100644
index 0000000000..2837d492f6
--- /dev/null
+++ b/pkg/machine/ports.go
@@ -0,0 +1,213 @@
+package machine
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net"
+	"os"
+	"path/filepath"
+	"strconv"
+
+	"github.com/containers/storage/pkg/ioutils"
+	"github.com/containers/storage/pkg/lockfile"
+	"github.com/sirupsen/logrus"
+)
+
+const (
+	portAllocFileName = "port-alloc.dat"
+	portLockFileName  = "port-alloc.lck"
+)
+
+// Reserves a unique port for a machine instance in a global (user) scope across
+// all machines and backend types. On success the port is guaranteed to not be
+// allocated until released with a call to ReleaseMachinePort().
+//
+// The purpose of this method is to prevent collisions between machine
+// instances when ran at the same time. Note, that dynamic port reassignment
+// on its own is insufficient to resolve conflicts, since there is a narrow
+// window between port detection and actual service binding, allowing for the
+// possibility of a second racing machine to fail if its check is unlucky to
+// fall within that window. Additionally, there is the potential for a long
+// running reassignment dance over start/stop until all machine instances
+// eventually arrive at total conflict free state. By reserving ports using
+// mechanism these scenarios are prevented.
+func AllocateMachinePort() (int, error) {
+	const maxRetries = 10000
+
+	handles := []io.Closer{}
+	defer func() {
+		for _, handle := range handles {
+			handle.Close()
+		}
+	}()
+
+	lock, err := acquirePortLock()
+	if err != nil {
+		return 0, err
+	}
+	defer lock.Unlock()
+
+	ports, err := loadPortAllocations()
+	if err != nil {
+		return 0, err
+	}
+
+	var port int
+	for i := 0; ; i++ {
+		var handle io.Closer
+
+		// Ports must be held temporarily to prevent repeat search results
+		handle, port, err = getRandomPortHold()
+		if err != nil {
+			return 0, err
+		}
+		handles = append(handles, handle)
+
+		if _, exists := ports[port]; !exists {
+			break
+		}
+
+		if i > maxRetries {
+			return 0, errors.New("maximum number of retries exceeded searching for available port")
+		}
+	}
+
+	ports[port] = struct{}{}
+	if err := storePortAllocations(ports); err != nil {
+		return 0, err
+	}
+
+	return port, nil
+}
+
+// Releases a reserved port for a machine when no longer required. Care should
+// be taken to ensure there are no conditions (e.g. failure paths) where the
+// port might unintentionally remain in use after releasing
+func ReleaseMachinePort(port int) error {
+	lock, err := acquirePortLock()
+	if err != nil {
+		return err
+	}
+	defer lock.Unlock()
+	ports, err := loadPortAllocations()
+	if err != nil {
+		return err
+	}
+
+	delete(ports, port)
+	return storePortAllocations(ports)
+}
+
+func IsLocalPortAvailable(port int) bool {
+	// Used to mark invalid / unassigned port
+	if port <= 0 {
+		return false
+	}
+
+	lc := getPortCheckListenConfig()
+	l, err := lc.Listen(context.Background(), "tcp", fmt.Sprintf("127.0.0.1:%d", port))
+	if err != nil {
+		return false
+	}
+	l.Close()
+	return true
+}
+
+func getRandomPortHold() (io.Closer, int, error) {
+	l, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		return nil, 0, fmt.Errorf("unable to get free machine port: %w", err)
+	}
+	_, portString, err := net.SplitHostPort(l.Addr().String())
+	if err != nil {
+		l.Close()
+		return nil, 0, fmt.Errorf("unable to determine free machine port: %w", err)
+	}
+	port, err := strconv.Atoi(portString)
+	if err != nil {
+		l.Close()
+		return nil, 0, fmt.Errorf("unable to convert port to int: %w", err)
+	}
+	return l, port, err
+}
+
+func acquirePortLock() (*lockfile.LockFile, error) {
+	lockDir, err := GetGlobalDataDir()
+	if err != nil {
+		return nil, err
+	}
+
+	lock, err := lockfile.GetLockFile(filepath.Join(lockDir, portLockFileName))
+	if err != nil {
+		return nil, err
+	}
+
+	lock.Lock()
+	return lock, nil
+}
+
+func loadPortAllocations() (map[int]struct{}, error) {
+	portDir, err := GetGlobalDataDir()
+	if err != nil {
+		return nil, err
+	}
+
+	var portData []int
+	exists := true
+	file, err := os.OpenFile(filepath.Join(portDir, portAllocFileName), 0, 0)
+	if errors.Is(err, os.ErrNotExist) {
+		exists = false
+	} else if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+
+	// Non-existence of the file, or a corrupt file are not treated as hard
+	// failures, since dynamic reassignment and continued use will eventually
+	// rebuild the dataset. This also makes migration cases simpler, since
+	// the state doesn't have to exist
+	if exists {
+		decoder := json.NewDecoder(file)
+		if err := decoder.Decode(&portData); err != nil {
+			logrus.Warnf("corrupt port allocation file, could not use state")
+		}
+	}
+
+	ports := make(map[int]struct{})
+	placeholder := struct{}{}
+	for _, port := range portData {
+		ports[port] = placeholder
+	}
+
+	return ports, nil
+}
+
+func storePortAllocations(ports map[int]struct{}) error {
+	portDir, err := GetGlobalDataDir()
+	if err != nil {
+		return err
+	}
+
+	portData := make([]int, 0, len(ports))
+	for port := range ports {
+		portData = append(portData, port)
+	}
+
+	opts := &ioutils.AtomicFileWriterOptions{ExplicitCommit: true}
+	w, err := ioutils.NewAtomicFileWriterWithOpts(filepath.Join(portDir, portAllocFileName), 0644, opts)
+	if err != nil {
+		return err
+	}
+	defer w.Close()
+
+	enc := json.NewEncoder(w)
+	if err := enc.Encode(portData); err != nil {
+		return err
+	}
+
+	// Commit the changes to disk if no errors
+	return w.Commit()
+}
diff --git a/pkg/machine/ports_unix.go b/pkg/machine/ports_unix.go
new file mode 100644
index 0000000000..e1960c3df4
--- /dev/null
+++ b/pkg/machine/ports_unix.go
@@ -0,0 +1,29 @@
+//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package machine
+
+import (
+	"net"
+	"syscall"
+)
+
+func getPortCheckListenConfig() *net.ListenConfig {
+	return &net.ListenConfig{
+		Control: func(network, address string, c syscall.RawConn) (cerr error) {
+			if err := c.Control(func(fd uintptr) {
+				// Prevent listening socket from holding over in TIME_WAIT in the rare case a connection
+				// attempt occurs in the short window the socket is listening. This ensures the registration
+				// will be gone when close() completes, freeing it up for the real subsequent listen by another
+				// process
+				cerr = syscall.SetsockoptLinger(int(fd), syscall.SOL_SOCKET, syscall.SO_LINGER, &syscall.Linger{
+					Onoff:  1,
+					Linger: 0,
+				})
+			}); err != nil {
+				cerr = err
+			}
+			return
+		},
+	}
+}
diff --git a/pkg/machine/ports_windows.go b/pkg/machine/ports_windows.go
new file mode 100644
index 0000000000..730d4be0b3
--- /dev/null
+++ b/pkg/machine/ports_windows.go
@@ -0,0 +1,28 @@
+package machine
+
+import (
+	"net"
+	"syscall"
+)
+
+// NOTE the reason for the code duplication between win and unix is that the syscall
+// implementations require a different cast (Handle on Windows, int on Unixes)
+func getPortCheckListenConfig() *net.ListenConfig {
+	return &net.ListenConfig{
+		Control: func(network, address string, c syscall.RawConn) (cerr error) {
+			if err := c.Control(func(fd uintptr) {
+				// Prevent listening socket from holding over in TIME_WAIT in the rare case a connection
+				// attempt occurs in the short window the socket is listening. This ensures the registration
+				// will be gone when close() completes, freeing it up for the real subsequent listen by another
+				// process
+				cerr = syscall.SetsockoptLinger(syscall.Handle(fd), syscall.SOL_SOCKET, syscall.SO_LINGER, &syscall.Linger{
+					Onoff:  1,
+					Linger: 0,
+				})
+			}); err != nil {
+				cerr = err
+			}
+			return
+		},
+	}
+}
diff --git a/pkg/machine/wsl/config.go b/pkg/machine/wsl/config.go
index e48fc03b06..57e6580d69 100644
--- a/pkg/machine/wsl/config.go
+++ b/pkg/machine/wsl/config.go
@@ -10,7 +10,6 @@ import (
 	"time"
 
 	"github.com/containers/podman/v4/pkg/machine"
-	"github.com/containers/podman/v4/utils"
 	"github.com/sirupsen/logrus"
 )
 
@@ -55,7 +54,7 @@ func (p *WSLVirtualization) NewMachine(opts machine.InitOptions) (machine.VM, er
 	}
 
 	// Add a random port for ssh
-	port, err := utils.GetRandomPort()
+	port, err := machine.AllocateMachinePort()
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/machine/wsl/machine.go b/pkg/machine/wsl/machine.go
index 27745ce274..1cd41721c2 100644
--- a/pkg/machine/wsl/machine.go
+++ b/pkg/machine/wsl/machine.go
@@ -49,6 +49,8 @@ const registriesConf = `unqualified-search-registries=["docker.io"]
 
 const appendPort = `grep -q Port\ %d /etc/ssh/sshd_config || echo Port %d >> /etc/ssh/sshd_config`
 
+const changePort = `sed -E -i 's/^Port[[:space:]]+[0-9]+/Port %d/' /etc/ssh/sshd_config`
+
 const configServices = `ln -fs /usr/lib/systemd/system/sshd.service /etc/systemd/system/multi-user.target.wants/sshd.service
 ln -fs /usr/lib/systemd/system/podman.socket /etc/systemd/system/sockets.target.wants/podman.socket
 rm -f /etc/systemd/system/getty.target.wants/console-getty.service
@@ -408,19 +410,33 @@ func (v *MachineVM) writeConfig() error {
 	return machine.WriteConfig(v.ConfigPath, v)
 }
 
-func setupConnections(v *MachineVM, opts machine.InitOptions) error {
+func constructSSHUris(v *MachineVM) ([]url.URL, []string) {
 	uri := machine.SSHRemoteConnection.MakeSSHURL(machine.LocalhostIP, rootlessSock, strconv.Itoa(v.Port), v.RemoteUsername)
 	uriRoot := machine.SSHRemoteConnection.MakeSSHURL(machine.LocalhostIP, rootfulSock, strconv.Itoa(v.Port), "root")
 
 	uris := []url.URL{uri, uriRoot}
 	names := []string{v.Name, v.Name + "-root"}
 
+	return uris, names
+}
+
+func setupConnections(v *MachineVM, opts machine.InitOptions) error {
+	uris, names := constructSSHUris(v)
+
 	// The first connection defined when connections is empty will become the default
 	// regardless of IsDefault, so order according to rootful
 	if opts.Rootful {
 		uris[0], names[0], uris[1], names[1] = uris[1], names[1], uris[0], names[0]
 	}
 
+	// We need to prevent racing connection updates to containers.conf globally
+	// across all backends to prevent connection overwrites
+	flock, err := obtainGlobalConfigLock()
+	if err != nil {
+		return fmt.Errorf("could not obtain global lock: %w", err)
+	}
+	defer flock.unlock()
+
 	for i := 0; i < 2; i++ {
 		if err := machine.AddConnection(&uris[i], names[i], v.IdentityPath, opts.IsDefault && i == 0); err != nil {
 			return err
@@ -992,6 +1008,13 @@ func (v *MachineVM) Start(name string, opts machine.StartOptions) error {
 		return err
 	}
 
+	if !machine.IsLocalPortAvailable(v.Port) {
+		logrus.Warnf("SSH port conflict detected, reassigning a new port")
+		if err := v.reassignSshPort(); err != nil {
+			return err
+		}
+	}
+
 	// Startup user-mode networking if enabled
 	if err := v.startUserModeNetworking(); err != nil {
 		return err
@@ -1040,6 +1063,74 @@ func (v *MachineVM) Start(name string, opts machine.StartOptions) error {
 	return err
 }
 
+func obtainGlobalConfigLock() (*fileLock, error) {
+	lockDir, err := machine.GetGlobalDataDir()
+	if err != nil {
+		return nil, err
+	}
+
+	// Lock file needs to be above all backends
+	// TODO: This should be changed to a common.Config lock mechanism when available
+	return lockFile(filepath.Join(lockDir, "podman-config.lck"))
+}
+
+func (v *MachineVM) reassignSshPort() error {
+	dist := toDist(v.Name)
+	newPort, err := machine.AllocateMachinePort()
+	if err != nil {
+		return err
+	}
+
+	success := false
+	defer func() {
+		if !success {
+			if err := machine.ReleaseMachinePort(newPort); err != nil {
+				logrus.Warnf("could not release port allocation as part of failure rollback (%d): %w", newPort, err)
+			}
+		}
+	}()
+
+	// We need to prevent racing connection updates to containers.conf globally
+	// across all backends to prevent connection overwrites
+	flock, err := obtainGlobalConfigLock()
+	if err != nil {
+		return fmt.Errorf("could not obtain global lock: %w", err)
+	}
+	defer flock.unlock()
+
+	// Write a transient invalid port, to force a retry on failure
+	oldPort := v.Port
+	v.Port = 0
+	if err := v.writeConfig(); err != nil {
+		return err
+	}
+
+	if err := machine.ReleaseMachinePort(oldPort); err != nil {
+		logrus.Warnf("could not release current ssh port allocation (%d): %w", oldPort, err)
+	}
+
+	if err := wslInvoke(dist, "sh", "-c", fmt.Sprintf(changePort, newPort)); err != nil {
+		return fmt.Errorf("could not change SSH port for guest OS: %w", err)
+	}
+
+	v.Port = newPort
+	uris, names := constructSSHUris(v)
+	for i := 0; i < 2; i++ {
+		if err := machine.ChangeConnectionURI(names[i], &uris[i]); err != nil {
+			return err
+		}
+	}
+
+	// Write updated port back
+	if err := v.writeConfig(); err != nil {
+		return err
+	}
+
+	success = true
+
+	return nil
+}
+
 func findExecutablePeer(name string) (string, error) {
 	exe, err := os.Executable()
 	if err != nil {
@@ -1360,6 +1451,10 @@ func (v *MachineVM) Remove(name string, opts machine.RemoveOptions) (string, fun
 				logrus.Error(err)
 			}
 		}
+		if err := machine.ReleaseMachinePort(v.Port); err != nil {
+			logrus.Warnf("could not release port allocation as part of removal (%d): %w", v.Port, err)
+		}
+
 		return nil
 	}, nil
 }