mirror of
https://github.com/containers/podman.git
synced 2025-10-19 20:23:08 +08:00

This includes two new hidden commands: a 9p server, `podman machine server9p`, and a 9p client, `podman machine client9p` with `server9p` currently only configured to run on Windows and serve 9p via HyperV vsock, and `client9p` only configured to run on Linux. The server is run by `podman machine start` and has the same lifespan as gvproxy (waits for the gvproxy PID to die before shutting down). The client is run inside the VM, also by `podman machine start`, and mounts uses kernel 9p mount code to complete the mount. It's unfortunately not possible to use mount directly without the wrapper; we need to set up the vsock and pass it to mount as an FD. In theory this can be generalized so that the server can run anywhere and over almost any transport, but I haven't done this here as I don't think we have a usecase other than HyperV right now. [NO NEW TESTS NEEDED] This requires changes to Podman in the VM, so we need to wait until a build with this lands in FCOS to test. Signed-off-by: Matthew Heon <matthew.heon@pm.me>
436 lines
13 KiB
Go
436 lines
13 KiB
Go
package vsock
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"os"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
// Hypervisor specifies that a socket should communicate with the hypervisor
|
|
// process. Note that this is _not_ the same as a socket owned by a process
|
|
// running on the hypervisor. Most users should probably use Host instead.
|
|
Hypervisor = 0x0
|
|
|
|
// Local specifies that a socket should communicate with a matching socket
|
|
// on the same machine. This provides an alternative to UNIX sockets or
|
|
// similar and may be useful in testing VM sockets applications.
|
|
Local = 0x1
|
|
|
|
// Host specifies that a socket should communicate with processes other than
|
|
// the hypervisor on the host machine. This is the correct choice to
|
|
// communicate with a process running on a hypervisor using a socket dialed
|
|
// from a guest.
|
|
Host = 0x2
|
|
|
|
// Error numbers we recognize, copied here to avoid importing x/sys/unix in
|
|
// cross-platform code.
|
|
ebadf = 9
|
|
enotconn = 107
|
|
|
|
// devVsock is the location of /dev/vsock. It is exposed on both the
|
|
// hypervisor and on virtual machines.
|
|
devVsock = "/dev/vsock"
|
|
|
|
// network is the vsock network reported in net.OpError.
|
|
network = "vsock"
|
|
|
|
// Operation names which may be returned in net.OpError.
|
|
opAccept = "accept"
|
|
opClose = "close"
|
|
opDial = "dial"
|
|
opListen = "listen"
|
|
opRawControl = "raw-control"
|
|
opRawRead = "raw-read"
|
|
opRawWrite = "raw-write"
|
|
opRead = "read"
|
|
opSet = "set"
|
|
opSyscallConn = "syscall-conn"
|
|
opWrite = "write"
|
|
)
|
|
|
|
// TODO(mdlayher): plumb through socket.Config.NetNS if it makes sense.
|
|
|
|
// Config contains options for a Conn or Listener.
|
|
type Config struct{}
|
|
|
|
// Listen opens a connection-oriented net.Listener for incoming VM sockets
|
|
// connections. The port parameter specifies the port for the Listener. Config
|
|
// specifies optional configuration for the Listener. If config is nil, a
|
|
// default configuration will be used.
|
|
//
|
|
// To allow the server to assign a port automatically, specify 0 for port. The
|
|
// address of the server can be retrieved using the Addr method.
|
|
//
|
|
// Listen automatically infers the appropriate context ID for this machine by
|
|
// calling ContextID and passing that value to ListenContextID. Callers with
|
|
// advanced use cases (such as using the Local context ID) may wish to use
|
|
// ListenContextID directly.
|
|
//
|
|
// When the Listener is no longer needed, Close must be called to free
|
|
// resources.
|
|
func Listen(port uint32, cfg *Config) (*Listener, error) {
|
|
cid, err := ContextID()
|
|
if err != nil {
|
|
// No addresses available.
|
|
return nil, opError(opListen, err, nil, nil)
|
|
}
|
|
|
|
return ListenContextID(cid, port, cfg)
|
|
}
|
|
|
|
// ListenContextID is the same as Listen, but also accepts an explicit context
|
|
// ID parameter. This function is intended for advanced use cases and most
|
|
// callers should use Listen instead.
|
|
//
|
|
// See the documentation of Listen for more details.
|
|
func ListenContextID(contextID, port uint32, cfg *Config) (*Listener, error) {
|
|
l, err := listen(contextID, port, cfg)
|
|
if err != nil {
|
|
// No remote address available.
|
|
return nil, opError(opListen, err, &Addr{
|
|
ContextID: contextID,
|
|
Port: port,
|
|
}, nil)
|
|
}
|
|
|
|
return l, nil
|
|
}
|
|
|
|
// FileListener returns a copy of the network listener corresponding to an open
|
|
// os.File. It is the caller's responsibility to close the Listener when
|
|
// finished. Closing the Listener does not affect the os.File, and closing the
|
|
// os.File does not affect the Listener.
|
|
//
|
|
// This function is intended for advanced use cases and most callers should use
|
|
// Listen instead.
|
|
func FileListener(f *os.File) (*Listener, error) {
|
|
l, err := fileListener(f)
|
|
if err != nil {
|
|
// No addresses available.
|
|
return nil, opError(opListen, err, nil, nil)
|
|
}
|
|
|
|
return l, nil
|
|
}
|
|
|
|
var _ net.Listener = &Listener{}
|
|
|
|
// A Listener is a VM sockets implementation of a net.Listener.
|
|
type Listener struct {
|
|
l *listener
|
|
}
|
|
|
|
// Accept implements the Accept method in the net.Listener interface; it waits
|
|
// for the next call and returns a generic net.Conn. The returned net.Conn will
|
|
// always be of type *Conn.
|
|
func (l *Listener) Accept() (net.Conn, error) {
|
|
c, err := l.l.Accept()
|
|
if err != nil {
|
|
return nil, l.opError(opAccept, err)
|
|
}
|
|
|
|
return c, nil
|
|
}
|
|
|
|
// Addr returns the listener's network address, a *Addr. The Addr returned is
|
|
// shared by all invocations of Addr, so do not modify it.
|
|
func (l *Listener) Addr() net.Addr { return l.l.Addr() }
|
|
|
|
// Close stops listening on the VM sockets address. Already Accepted connections
|
|
// are not closed.
|
|
func (l *Listener) Close() error {
|
|
return l.opError(opClose, l.l.Close())
|
|
}
|
|
|
|
// SetDeadline sets the deadline associated with the listener. A zero time value
|
|
// disables the deadline.
|
|
func (l *Listener) SetDeadline(t time.Time) error {
|
|
return l.opError(opSet, l.l.SetDeadline(t))
|
|
}
|
|
|
|
// opError is a convenience for the function opError that also passes the local
|
|
// address of the Listener.
|
|
func (l *Listener) opError(op string, err error) error {
|
|
// No remote address for a Listener.
|
|
return opError(op, err, l.Addr(), nil)
|
|
}
|
|
|
|
// Dial dials a connection-oriented net.Conn to a VM sockets listener. The
|
|
// context ID and port parameters specify the address of the listener. Config
|
|
// specifies optional configuration for the Conn. If config is nil, a default
|
|
// configuration will be used.
|
|
//
|
|
// If dialing a connection from the hypervisor to a virtual machine, the VM's
|
|
// context ID should be specified.
|
|
//
|
|
// If dialing from a VM to the hypervisor, Hypervisor should be used to
|
|
// communicate with the hypervisor process, or Host should be used to
|
|
// communicate with other processes on the host machine.
|
|
//
|
|
// When the connection is no longer needed, Close must be called to free
|
|
// resources.
|
|
func Dial(contextID, port uint32, cfg *Config) (*Conn, error) {
|
|
c, err := dial(contextID, port, cfg)
|
|
if err != nil {
|
|
// No local address, but we have a remote address we can return.
|
|
return nil, opError(opDial, err, nil, &Addr{
|
|
ContextID: contextID,
|
|
Port: port,
|
|
})
|
|
}
|
|
|
|
return c, nil
|
|
}
|
|
|
|
var (
|
|
_ net.Conn = &Conn{}
|
|
_ syscall.Conn = &Conn{}
|
|
)
|
|
|
|
// A Conn is a VM sockets implementation of a net.Conn.
|
|
type Conn struct {
|
|
c *conn
|
|
local *Addr
|
|
remote *Addr
|
|
}
|
|
|
|
// Close closes the connection.
|
|
func (c *Conn) Close() error {
|
|
return c.opError(opClose, c.c.Close())
|
|
}
|
|
|
|
// CloseRead shuts down the reading side of the VM sockets connection. Most
|
|
// callers should just use Close.
|
|
func (c *Conn) CloseRead() error {
|
|
return c.opError(opClose, c.c.CloseRead())
|
|
}
|
|
|
|
// CloseWrite shuts down the writing side of the VM sockets connection. Most
|
|
// callers should just use Close.
|
|
func (c *Conn) CloseWrite() error {
|
|
return c.opError(opClose, c.c.CloseWrite())
|
|
}
|
|
|
|
// LocalAddr returns the local network address. The Addr returned is shared by
|
|
// all invocations of LocalAddr, so do not modify it.
|
|
func (c *Conn) LocalAddr() net.Addr { return c.local }
|
|
|
|
// RemoteAddr returns the remote network address. The Addr returned is shared by
|
|
// all invocations of RemoteAddr, so do not modify it.
|
|
func (c *Conn) RemoteAddr() net.Addr { return c.remote }
|
|
|
|
// Read implements the net.Conn Read method.
|
|
func (c *Conn) Read(b []byte) (int, error) {
|
|
n, err := c.c.Read(b)
|
|
if err != nil {
|
|
return n, c.opError(opRead, err)
|
|
}
|
|
|
|
return n, nil
|
|
}
|
|
|
|
// Write implements the net.Conn Write method.
|
|
func (c *Conn) Write(b []byte) (int, error) {
|
|
n, err := c.c.Write(b)
|
|
if err != nil {
|
|
return n, c.opError(opWrite, err)
|
|
}
|
|
|
|
return n, nil
|
|
}
|
|
|
|
// SetDeadline implements the net.Conn SetDeadline method.
|
|
func (c *Conn) SetDeadline(t time.Time) error {
|
|
return c.opError(opSet, c.c.SetDeadline(t))
|
|
}
|
|
|
|
// SetReadDeadline implements the net.Conn SetReadDeadline method.
|
|
func (c *Conn) SetReadDeadline(t time.Time) error {
|
|
return c.opError(opSet, c.c.SetReadDeadline(t))
|
|
}
|
|
|
|
// SetWriteDeadline implements the net.Conn SetWriteDeadline method.
|
|
func (c *Conn) SetWriteDeadline(t time.Time) error {
|
|
return c.opError(opSet, c.c.SetWriteDeadline(t))
|
|
}
|
|
|
|
// SyscallConn returns a raw network connection. This implements the
|
|
// syscall.Conn interface.
|
|
func (c *Conn) SyscallConn() (syscall.RawConn, error) {
|
|
rc, err := c.c.SyscallConn()
|
|
if err != nil {
|
|
return nil, c.opError(opSyscallConn, err)
|
|
}
|
|
|
|
return &rawConn{
|
|
rc: rc,
|
|
local: c.local,
|
|
remote: c.remote,
|
|
}, nil
|
|
}
|
|
|
|
// opError is a convenience for the function opError that also passes the local
|
|
// and remote addresses of the Conn.
|
|
func (c *Conn) opError(op string, err error) error {
|
|
return opError(op, err, c.local, c.remote)
|
|
}
|
|
|
|
// TODO(mdlayher): see if we can port smarter net.OpError with local/remote
|
|
// address error logic into socket.Conn's SyscallConn type to avoid the need for
|
|
// this wrapper.
|
|
|
|
var _ syscall.RawConn = &rawConn{}
|
|
|
|
// A rawConn is a syscall.RawConn that wraps an internal syscall.RawConn in order
|
|
// to produce net.OpError error values.
|
|
type rawConn struct {
|
|
rc syscall.RawConn
|
|
local, remote *Addr
|
|
}
|
|
|
|
// Control implements the syscall.RawConn Control method.
|
|
func (rc *rawConn) Control(fn func(fd uintptr)) error {
|
|
return rc.opError(opRawControl, rc.rc.Control(fn))
|
|
}
|
|
|
|
// Control implements the syscall.RawConn Read method.
|
|
func (rc *rawConn) Read(fn func(fd uintptr) (done bool)) error {
|
|
return rc.opError(opRawRead, rc.rc.Read(fn))
|
|
}
|
|
|
|
// Control implements the syscall.RawConn Write method.
|
|
func (rc *rawConn) Write(fn func(fd uintptr) (done bool)) error {
|
|
return rc.opError(opRawWrite, rc.rc.Write(fn))
|
|
}
|
|
|
|
// opError is a convenience for the function opError that also passes the local
|
|
// and remote addresses of the rawConn.
|
|
func (rc *rawConn) opError(op string, err error) error {
|
|
return opError(op, err, rc.local, rc.remote)
|
|
}
|
|
|
|
var _ net.Addr = &Addr{}
|
|
|
|
// An Addr is the address of a VM sockets endpoint.
|
|
type Addr struct {
|
|
ContextID, Port uint32
|
|
}
|
|
|
|
// Network returns the address's network name, "vsock".
|
|
func (a *Addr) Network() string { return network }
|
|
|
|
// String returns a human-readable representation of Addr, and indicates if
|
|
// ContextID is meant to be used for a hypervisor, host, VM, etc.
|
|
func (a *Addr) String() string {
|
|
var host string
|
|
|
|
switch a.ContextID {
|
|
case Hypervisor:
|
|
host = fmt.Sprintf("hypervisor(%d)", a.ContextID)
|
|
case Local:
|
|
host = fmt.Sprintf("local(%d)", a.ContextID)
|
|
case Host:
|
|
host = fmt.Sprintf("host(%d)", a.ContextID)
|
|
default:
|
|
host = fmt.Sprintf("vm(%d)", a.ContextID)
|
|
}
|
|
|
|
return fmt.Sprintf("%s:%d", host, a.Port)
|
|
}
|
|
|
|
// fileName returns a file name for use with os.NewFile for Addr.
|
|
func (a *Addr) fileName() string {
|
|
return fmt.Sprintf("%s:%s", a.Network(), a.String())
|
|
}
|
|
|
|
// ContextID retrieves the local VM sockets context ID for this system.
|
|
// ContextID can be used to directly determine if a system is capable of using
|
|
// VM sockets.
|
|
//
|
|
// If the kernel module is unavailable, access to the kernel module is denied,
|
|
// or VM sockets are unsupported on this system, it returns an error.
|
|
func ContextID() (uint32, error) {
|
|
return contextID()
|
|
}
|
|
|
|
// opError unpacks err if possible, producing a net.OpError with the input
|
|
// parameters in order to implement net.Conn. As a convenience, opError returns
|
|
// nil if the input error is nil.
|
|
func opError(op string, err error, local, remote net.Addr) error {
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
|
|
// TODO(mdlayher): this entire function is suspect and should probably be
|
|
// looked at carefully, especially with Go 1.13+ error wrapping.
|
|
//
|
|
// Eventually this *net.OpError logic should probably be ported into
|
|
// mdlayher/socket because similar checks are necessary to comply with
|
|
// nettest.TestConn.
|
|
|
|
// Unwrap inner errors from error types.
|
|
//
|
|
// TODO(mdlayher): errors.Cause or similar in Go 1.13.
|
|
switch xerr := err.(type) {
|
|
// os.PathError produced by os.File method calls.
|
|
case *os.PathError:
|
|
// Although we could make use of xerr.Op here, we're passing it manually
|
|
// for consistency, since some of the Conn calls we are making don't
|
|
// wrap an os.File, which would return an Op for us.
|
|
//
|
|
// As a special case, if the error is related to access to the /dev/vsock
|
|
// device, we don't unwrap it, so the caller has more context as to why
|
|
// their operation actually failed than "permission denied" or similar.
|
|
if xerr.Path != devVsock {
|
|
err = xerr.Err
|
|
}
|
|
}
|
|
|
|
switch {
|
|
case err == io.EOF, isErrno(err, enotconn):
|
|
// We may see a literal io.EOF as happens with x/net/nettest, but
|
|
// "transport not connected" also means io.EOF in Go.
|
|
return io.EOF
|
|
case err == os.ErrClosed, isErrno(err, ebadf), strings.Contains(err.Error(), "use of closed"):
|
|
// Different operations may return different errors that all effectively
|
|
// indicate a closed file.
|
|
//
|
|
// To rectify the differences, net.TCPConn uses an error with this text
|
|
// from internal/poll for the backing file already being closed.
|
|
err = errors.New("use of closed network connection")
|
|
default:
|
|
// Nothing to do, return this directly.
|
|
}
|
|
|
|
// Determine source and addr using the rules defined by net.OpError's
|
|
// documentation: https://golang.org/pkg/net/#OpError.
|
|
var source, addr net.Addr
|
|
switch op {
|
|
case opClose, opDial, opRawRead, opRawWrite, opRead, opWrite:
|
|
if local != nil {
|
|
source = local
|
|
}
|
|
if remote != nil {
|
|
addr = remote
|
|
}
|
|
case opAccept, opListen, opRawControl, opSet, opSyscallConn:
|
|
if local != nil {
|
|
addr = local
|
|
}
|
|
}
|
|
|
|
return &net.OpError{
|
|
Op: op,
|
|
Net: network,
|
|
Source: source,
|
|
Addr: addr,
|
|
Err: err,
|
|
}
|
|
}
|