Implement automatic port reassignment on Windows

While only leveraged by the WSL backend, this commit also adds core
infrastructure for all other backends for future enhancement.

- Adds a common port cross backend allocation registry to prevent duplicate
  assignment across multiple machine instances
- Introduces logic in Start() that detects OS port conflicts and scans for a
  viable replacement port
- Updates connection definitions and server configuration accordingly
- Utilizes a coordinated file lock strategy to prevent racing overwrites of port
  and connection registries
- WSL backend coordinates locking for containers.conf until a future common
  enhancement exists to replace it

[NO NEW TESTS NEEDED]

Signed-off-by: Jason T. Greene <jason.greene@redhat.com>
This commit is contained in:
Jason T. Greene
2023-08-08 13:05:07 -05:00
parent 938a3e1601
commit 63a8926e96
7 changed files with 393 additions and 3 deletions

View File

@ -199,6 +199,17 @@ func GetDataDir(vmType VMType) (string, error) {
return dataDir, mkdirErr
}
// GetGLobalDataDir returns the root of all backends
// for shared machine data.
func GetGlobalDataDir() (string, error) {
dataDir, err := DataDirPrefix()
if err != nil {
return "", err
}
return dataDir, os.MkdirAll(dataDir, 0755)
}
// DataDirPrefix returns the path prefix for all machine data files
func DataDirPrefix() (string, error) {
data, err := homedir.GetDataHome()

View File

@ -58,6 +58,21 @@ func AnyConnectionDefault(name ...string) (bool, error) {
return false, nil
}
func ChangeConnectionURI(name string, uri fmt.Stringer) error {
cfg, err := config.ReadCustomConfig()
if err != nil {
return err
}
dst, ok := cfg.Engine.ServiceDestinations[name]
if !ok {
return errors.New("connection not found")
}
dst.URI = uri.String()
cfg.Engine.ServiceDestinations[name] = dst
return cfg.Write()
}
func ChangeDefault(name string) error {
cfg, err := config.ReadCustomConfig()
if err != nil {

213
pkg/machine/ports.go Normal file
View File

@ -0,0 +1,213 @@
package machine
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"os"
"path/filepath"
"strconv"
"github.com/containers/storage/pkg/ioutils"
"github.com/containers/storage/pkg/lockfile"
"github.com/sirupsen/logrus"
)
const (
portAllocFileName = "port-alloc.dat"
portLockFileName = "port-alloc.lck"
)
// Reserves a unique port for a machine instance in a global (user) scope across
// all machines and backend types. On success the port is guaranteed to not be
// allocated until released with a call to ReleaseMachinePort().
//
// The purpose of this method is to prevent collisions between machine
// instances when ran at the same time. Note, that dynamic port reassignment
// on its own is insufficient to resolve conflicts, since there is a narrow
// window between port detection and actual service binding, allowing for the
// possibility of a second racing machine to fail if its check is unlucky to
// fall within that window. Additionally, there is the potential for a long
// running reassignment dance over start/stop until all machine instances
// eventually arrive at total conflict free state. By reserving ports using
// mechanism these scenarios are prevented.
func AllocateMachinePort() (int, error) {
const maxRetries = 10000
handles := []io.Closer{}
defer func() {
for _, handle := range handles {
handle.Close()
}
}()
lock, err := acquirePortLock()
if err != nil {
return 0, err
}
defer lock.Unlock()
ports, err := loadPortAllocations()
if err != nil {
return 0, err
}
var port int
for i := 0; ; i++ {
var handle io.Closer
// Ports must be held temporarily to prevent repeat search results
handle, port, err = getRandomPortHold()
if err != nil {
return 0, err
}
handles = append(handles, handle)
if _, exists := ports[port]; !exists {
break
}
if i > maxRetries {
return 0, errors.New("maximum number of retries exceeded searching for available port")
}
}
ports[port] = struct{}{}
if err := storePortAllocations(ports); err != nil {
return 0, err
}
return port, nil
}
// Releases a reserved port for a machine when no longer required. Care should
// be taken to ensure there are no conditions (e.g. failure paths) where the
// port might unintentionally remain in use after releasing
func ReleaseMachinePort(port int) error {
lock, err := acquirePortLock()
if err != nil {
return err
}
defer lock.Unlock()
ports, err := loadPortAllocations()
if err != nil {
return err
}
delete(ports, port)
return storePortAllocations(ports)
}
func IsLocalPortAvailable(port int) bool {
// Used to mark invalid / unassigned port
if port <= 0 {
return false
}
lc := getPortCheckListenConfig()
l, err := lc.Listen(context.Background(), "tcp", fmt.Sprintf("127.0.0.1:%d", port))
if err != nil {
return false
}
l.Close()
return true
}
func getRandomPortHold() (io.Closer, int, error) {
l, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
return nil, 0, fmt.Errorf("unable to get free machine port: %w", err)
}
_, portString, err := net.SplitHostPort(l.Addr().String())
if err != nil {
l.Close()
return nil, 0, fmt.Errorf("unable to determine free machine port: %w", err)
}
port, err := strconv.Atoi(portString)
if err != nil {
l.Close()
return nil, 0, fmt.Errorf("unable to convert port to int: %w", err)
}
return l, port, err
}
func acquirePortLock() (*lockfile.LockFile, error) {
lockDir, err := GetGlobalDataDir()
if err != nil {
return nil, err
}
lock, err := lockfile.GetLockFile(filepath.Join(lockDir, portLockFileName))
if err != nil {
return nil, err
}
lock.Lock()
return lock, nil
}
func loadPortAllocations() (map[int]struct{}, error) {
portDir, err := GetGlobalDataDir()
if err != nil {
return nil, err
}
var portData []int
exists := true
file, err := os.OpenFile(filepath.Join(portDir, portAllocFileName), 0, 0)
if errors.Is(err, os.ErrNotExist) {
exists = false
} else if err != nil {
return nil, err
}
defer file.Close()
// Non-existence of the file, or a corrupt file are not treated as hard
// failures, since dynamic reassignment and continued use will eventually
// rebuild the dataset. This also makes migration cases simpler, since
// the state doesn't have to exist
if exists {
decoder := json.NewDecoder(file)
if err := decoder.Decode(&portData); err != nil {
logrus.Warnf("corrupt port allocation file, could not use state")
}
}
ports := make(map[int]struct{})
placeholder := struct{}{}
for _, port := range portData {
ports[port] = placeholder
}
return ports, nil
}
func storePortAllocations(ports map[int]struct{}) error {
portDir, err := GetGlobalDataDir()
if err != nil {
return err
}
portData := make([]int, 0, len(ports))
for port := range ports {
portData = append(portData, port)
}
opts := &ioutils.AtomicFileWriterOptions{ExplicitCommit: true}
w, err := ioutils.NewAtomicFileWriterWithOpts(filepath.Join(portDir, portAllocFileName), 0644, opts)
if err != nil {
return err
}
defer w.Close()
enc := json.NewEncoder(w)
if err := enc.Encode(portData); err != nil {
return err
}
// Commit the changes to disk if no errors
return w.Commit()
}

29
pkg/machine/ports_unix.go Normal file
View File

@ -0,0 +1,29 @@
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd
// +build darwin dragonfly freebsd linux netbsd openbsd
package machine
import (
"net"
"syscall"
)
func getPortCheckListenConfig() *net.ListenConfig {
return &net.ListenConfig{
Control: func(network, address string, c syscall.RawConn) (cerr error) {
if err := c.Control(func(fd uintptr) {
// Prevent listening socket from holding over in TIME_WAIT in the rare case a connection
// attempt occurs in the short window the socket is listening. This ensures the registration
// will be gone when close() completes, freeing it up for the real subsequent listen by another
// process
cerr = syscall.SetsockoptLinger(int(fd), syscall.SOL_SOCKET, syscall.SO_LINGER, &syscall.Linger{
Onoff: 1,
Linger: 0,
})
}); err != nil {
cerr = err
}
return
},
}
}

View File

@ -0,0 +1,28 @@
package machine
import (
"net"
"syscall"
)
// NOTE the reason for the code duplication between win and unix is that the syscall
// implementations require a different cast (Handle on Windows, int on Unixes)
func getPortCheckListenConfig() *net.ListenConfig {
return &net.ListenConfig{
Control: func(network, address string, c syscall.RawConn) (cerr error) {
if err := c.Control(func(fd uintptr) {
// Prevent listening socket from holding over in TIME_WAIT in the rare case a connection
// attempt occurs in the short window the socket is listening. This ensures the registration
// will be gone when close() completes, freeing it up for the real subsequent listen by another
// process
cerr = syscall.SetsockoptLinger(syscall.Handle(fd), syscall.SOL_SOCKET, syscall.SO_LINGER, &syscall.Linger{
Onoff: 1,
Linger: 0,
})
}); err != nil {
cerr = err
}
return
},
}
}

View File

@ -10,7 +10,6 @@ import (
"time"
"github.com/containers/podman/v4/pkg/machine"
"github.com/containers/podman/v4/utils"
"github.com/sirupsen/logrus"
)
@ -55,7 +54,7 @@ func (p *WSLVirtualization) NewMachine(opts machine.InitOptions) (machine.VM, er
}
// Add a random port for ssh
port, err := utils.GetRandomPort()
port, err := machine.AllocateMachinePort()
if err != nil {
return nil, err
}

View File

@ -49,6 +49,8 @@ const registriesConf = `unqualified-search-registries=["docker.io"]
const appendPort = `grep -q Port\ %d /etc/ssh/sshd_config || echo Port %d >> /etc/ssh/sshd_config`
const changePort = `sed -E -i 's/^Port[[:space:]]+[0-9]+/Port %d/' /etc/ssh/sshd_config`
const configServices = `ln -fs /usr/lib/systemd/system/sshd.service /etc/systemd/system/multi-user.target.wants/sshd.service
ln -fs /usr/lib/systemd/system/podman.socket /etc/systemd/system/sockets.target.wants/podman.socket
rm -f /etc/systemd/system/getty.target.wants/console-getty.service
@ -408,19 +410,33 @@ func (v *MachineVM) writeConfig() error {
return machine.WriteConfig(v.ConfigPath, v)
}
func setupConnections(v *MachineVM, opts machine.InitOptions) error {
func constructSSHUris(v *MachineVM) ([]url.URL, []string) {
uri := machine.SSHRemoteConnection.MakeSSHURL(machine.LocalhostIP, rootlessSock, strconv.Itoa(v.Port), v.RemoteUsername)
uriRoot := machine.SSHRemoteConnection.MakeSSHURL(machine.LocalhostIP, rootfulSock, strconv.Itoa(v.Port), "root")
uris := []url.URL{uri, uriRoot}
names := []string{v.Name, v.Name + "-root"}
return uris, names
}
func setupConnections(v *MachineVM, opts machine.InitOptions) error {
uris, names := constructSSHUris(v)
// The first connection defined when connections is empty will become the default
// regardless of IsDefault, so order according to rootful
if opts.Rootful {
uris[0], names[0], uris[1], names[1] = uris[1], names[1], uris[0], names[0]
}
// We need to prevent racing connection updates to containers.conf globally
// across all backends to prevent connection overwrites
flock, err := obtainGlobalConfigLock()
if err != nil {
return fmt.Errorf("could not obtain global lock: %w", err)
}
defer flock.unlock()
for i := 0; i < 2; i++ {
if err := machine.AddConnection(&uris[i], names[i], v.IdentityPath, opts.IsDefault && i == 0); err != nil {
return err
@ -992,6 +1008,13 @@ func (v *MachineVM) Start(name string, opts machine.StartOptions) error {
return err
}
if !machine.IsLocalPortAvailable(v.Port) {
logrus.Warnf("SSH port conflict detected, reassigning a new port")
if err := v.reassignSshPort(); err != nil {
return err
}
}
// Startup user-mode networking if enabled
if err := v.startUserModeNetworking(); err != nil {
return err
@ -1040,6 +1063,74 @@ func (v *MachineVM) Start(name string, opts machine.StartOptions) error {
return err
}
func obtainGlobalConfigLock() (*fileLock, error) {
lockDir, err := machine.GetGlobalDataDir()
if err != nil {
return nil, err
}
// Lock file needs to be above all backends
// TODO: This should be changed to a common.Config lock mechanism when available
return lockFile(filepath.Join(lockDir, "podman-config.lck"))
}
func (v *MachineVM) reassignSshPort() error {
dist := toDist(v.Name)
newPort, err := machine.AllocateMachinePort()
if err != nil {
return err
}
success := false
defer func() {
if !success {
if err := machine.ReleaseMachinePort(newPort); err != nil {
logrus.Warnf("could not release port allocation as part of failure rollback (%d): %w", newPort, err)
}
}
}()
// We need to prevent racing connection updates to containers.conf globally
// across all backends to prevent connection overwrites
flock, err := obtainGlobalConfigLock()
if err != nil {
return fmt.Errorf("could not obtain global lock: %w", err)
}
defer flock.unlock()
// Write a transient invalid port, to force a retry on failure
oldPort := v.Port
v.Port = 0
if err := v.writeConfig(); err != nil {
return err
}
if err := machine.ReleaseMachinePort(oldPort); err != nil {
logrus.Warnf("could not release current ssh port allocation (%d): %w", oldPort, err)
}
if err := wslInvoke(dist, "sh", "-c", fmt.Sprintf(changePort, newPort)); err != nil {
return fmt.Errorf("could not change SSH port for guest OS: %w", err)
}
v.Port = newPort
uris, names := constructSSHUris(v)
for i := 0; i < 2; i++ {
if err := machine.ChangeConnectionURI(names[i], &uris[i]); err != nil {
return err
}
}
// Write updated port back
if err := v.writeConfig(); err != nil {
return err
}
success = true
return nil
}
func findExecutablePeer(name string) (string, error) {
exe, err := os.Executable()
if err != nil {
@ -1360,6 +1451,10 @@ func (v *MachineVM) Remove(name string, opts machine.RemoveOptions) (string, fun
logrus.Error(err)
}
}
if err := machine.ReleaseMachinePort(v.Port); err != nil {
logrus.Warnf("could not release port allocation as part of removal (%d): %w", v.Port, err)
}
return nil
}, nil
}