Initial addition of 9p code to Podman

This includes two new hidden commands: a 9p server,
`podman machine server9p`, and a 9p client,
`podman machine client9p` with `server9p` currently only
configured to run on Windows and serve 9p via HyperV vsock, and
`client9p` only configured to run on Linux. The server is run by
`podman machine start` and has the same lifespan as gvproxy
(waits for the gvproxy PID to die before shutting down). The
client is run inside the VM, also by `podman machine start`, and
mounts uses kernel 9p mount code to complete the mount. It's
unfortunately not possible to use mount directly without the
wrapper; we need to set up the vsock and pass it to mount as an
FD.

In theory this can be generalized so that the server can run
anywhere and over almost any transport, but I haven't done this
here as I don't think we have a usecase other than HyperV right
now.

[NO NEW TESTS NEEDED] This requires changes to Podman in the VM,
so we need to wait until a build with this lands in FCOS to test.

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
Matthew Heon
2023-09-11 13:41:15 -04:00
committed by Matt Heon
parent 2972f5941f
commit 642fa98976
93 changed files with 15119 additions and 29 deletions

11
vendor/github.com/linuxkit/virtsock/AUTHORS generated vendored Normal file
View File

@ -0,0 +1,11 @@
# This file lists all individuals having contributed content to the repository.
# For how it is generated, see `scripts/generate-authors.sh`.
Ben Weedon <beweedon@microsoft.com>
Ian Campbell <ian.campbell@docker.com>
John Starks <jostarks@microsoft.com>
Justin Cormack <justin.cormack@docker.com>
Magnus Skjegstad <magnus.skjegstad@docker.com>
Riyaz Faizullabhoy <riyaz.faizullabhoy@docker.com>
Rolf Neugebauer <rolf.neugebauer@docker.com>
Simon Ferquel <simon.ferquel@docker.com>

16
vendor/github.com/linuxkit/virtsock/LICENSE generated vendored Normal file
View File

@ -0,0 +1,16 @@
Unless explicitly stated at the top of the file, this code is covered
by the following license:
Copyright 2016-2017 The authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,115 @@
// Package hvsock provides a Go interface to Hyper-V sockets both on
// Windows and on Linux. The Linux bindings require patches for the
// 4.9.x kernel. If you are using a Linux kernel 4.14.x or newer you
// should use the vsock package instead as the Hyper-V socket support
// in these kernels have been merged with the virtio sockets
// implementation.
package hvsock
import (
"encoding/binary"
"fmt"
"net"
"reflect"
)
var (
// GUIDZero used by listeners to accept connections from all partitions
GUIDZero, _ = GUIDFromString("00000000-0000-0000-0000-000000000000")
// GUIDWildcard used by listeners to accept connections from all partitions
GUIDWildcard, _ = GUIDFromString("00000000-0000-0000-0000-000000000000")
// GUIDBroadcast undocumented
GUIDBroadcast, _ = GUIDFromString("FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF")
// GUIDChildren used by listeners to accept connections from children
GUIDChildren, _ = GUIDFromString("90db8b89-0d35-4f79-8ce9-49ea0ac8b7cd")
// GUIDLoopback use to connect in loopback mode
GUIDLoopback, _ = GUIDFromString("e0e16197-dd56-4a10-9195-5ee7a155a838")
// GUIDParent use to connect to the parent partition
GUIDParent, _ = GUIDFromString("a42e7cda-d03f-480c-9cc2-a4de20abb878")
// GUIDs for LinuxVMs with the new Hyper-V socket implementation need to match this template
guidTemplate, _ = GUIDFromString("00000000-facb-11e6-bd58-64006a7986d3")
)
const (
// The Hyper-V socket implementation used in the 4.9.x kernels
// seems to fail silently if messages are above 8k. The newer
// implementation in the 4.14.x (and newer) kernels seems to
// work fine with larger messages. This is constant is used as
// a temporary workaround to limit the amount of data sent and
// should be removed once support for 4.9.x kernels is
// deprecated.
maxMsgSize = 8 * 1024
)
// GUID is used by Hypper-V sockets for "addresses" and "ports"
type GUID [16]byte
// Convert a GUID into a string
func (g *GUID) String() string {
/* XXX This assume little endian */
return fmt.Sprintf("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
g[3], g[2], g[1], g[0],
g[5], g[4],
g[7], g[6],
g[8], g[9],
g[10], g[11], g[12], g[13], g[14], g[15])
}
// Port converts a Service GUID to a "port" usable by the vsock package.
// It can be used to convert hvsock code to vsock code. On 4.14.x
// kernels Service GUIDs for talking to Linux should have the form of
// xxxxxxxx-facb-11e6-bd58-64006a7986d3, where xxxxxxxx is the vsock port.
func (g *GUID) Port() (uint32, error) {
// Check that the GUID is as expected
if !reflect.DeepEqual(g[4:], guidTemplate[4:]) {
return 0, fmt.Errorf("%s does not conform with the template", g)
}
return binary.LittleEndian.Uint32(g[0:4]), nil
}
// GUIDFromString parses a string and returns a GUID
func GUIDFromString(s string) (GUID, error) {
var g GUID
var err error
_, err = fmt.Sscanf(s, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
&g[3], &g[2], &g[1], &g[0],
&g[5], &g[4],
&g[7], &g[6],
&g[8], &g[9],
&g[10], &g[11], &g[12], &g[13], &g[14], &g[15])
return g, err
}
// Addr represents a Hyper-V socket address
type Addr struct {
VMID GUID
ServiceID GUID
}
// Network returns the type of network for Hyper-V sockets
func (a Addr) Network() string {
return "hvsock"
}
func (a Addr) String() string {
vmid := a.VMID.String()
svc := a.ServiceID.String()
return vmid + ":" + svc
}
// Conn is a hvsock connection which supports half-close.
type Conn interface {
net.Conn
CloseRead() error
CloseWrite() error
}
// Since there doesn't seem to be a standard min function
func min(x, y int) int {
if x < y {
return x
}
return y
}

View File

@ -0,0 +1,22 @@
// +build !linux,!windows
package hvsock
import (
"fmt"
"net"
"runtime"
)
// Supported returns if hvsocks are supported on your platform
func Supported() bool {
return false
}
func Dial(raddr Addr) (Conn, error) {
return nil, fmt.Errorf("Dial() not implemented on %s", runtime.GOOS)
}
func Listen(addr Addr) (net.Listener, error) {
return nil, fmt.Errorf("Listen() not implemented on %s", runtime.GOOS)
}

View File

@ -0,0 +1,277 @@
package hvsock
// On Linux we have to deal with two different implementations. The
// "legacy" implementation never made it into the kernel, but several
// kernels, including the LinuxKit one carried patches for it for
// quite a while. The legacy version defined a new address family
// while the new version sits on top of the existing VMware/virtio
// socket implementation.
//
// We try to determine at init if we are on a kernel with the legacy
// implementation or the new version and set "legacyMode" accordingly.
//
// We can't just reuse the vsock implementation as we still need to
// emulated CloseRead()/CloseWrite() as not all Windows builds support
// it.
/*
#include <sys/socket.h>
struct sockaddr_hv {
unsigned short shv_family;
unsigned short reserved;
unsigned char shv_vm_id[16];
unsigned char shv_service_id[16];
};
int bind_sockaddr_hv(int fd, const struct sockaddr_hv *sa_hv) {
return bind(fd, (const struct sockaddr*)sa_hv, sizeof(*sa_hv));
}
int connect_sockaddr_hv(int fd, const struct sockaddr_hv *sa_hv) {
return connect(fd, (const struct sockaddr*)sa_hv, sizeof(*sa_hv));
}
int accept_hv(int fd, struct sockaddr_hv *sa_hv, socklen_t *sa_hv_len) {
return accept(fd, (struct sockaddr *)sa_hv, sa_hv_len);
}
int getsockname_hv(int fd, struct sockaddr_hv *sa_hv, socklen_t *sa_hv_len) {
return getsockname(fd, (struct sockaddr *)sa_hv, sa_hv_len);
}
*/
import "C"
import (
"fmt"
"net"
"os"
"syscall"
"time"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)
const (
hvsockAF = 43 //SHV_PROTO_RAW
hvsockRaw = 1 // SHV_PROTO_RAW
)
// Supported returns if hvsocks are supported on your platform
func Supported() bool {
var sa C.struct_sockaddr_hv
var sa_len C.socklen_t
// Try opening a hvsockAF socket. If it works we are on older, i.e. 4.9.x kernels.
// 4.11 defines AF_SMC as 43 but it doesn't support protocol 1 so the
// socket() call should fail.
fd, err := syscall.Socket(hvsockAF, syscall.SOCK_STREAM, hvsockRaw)
if err != nil {
return false
}
// 4.16 defines SMCPROTO_SMC6 as 1 but its socket name size doesn't match
// size of sockaddr_hv so corresponding check should fail.
sa_len = C.sizeof_struct_sockaddr_hv
ret, _ := C.getsockname_hv(C.int(fd), &sa, &sa_len)
syscall.Close(fd)
if ret < 0 || sa_len != C.sizeof_struct_sockaddr_hv {
return false
}
return true
}
// Dial a Hyper-V socket address
func Dial(raddr Addr) (Conn, error) {
fd, err := syscall.Socket(hvsockAF, syscall.SOCK_STREAM, hvsockRaw)
if err != nil {
return nil, err
}
sa := C.struct_sockaddr_hv{}
sa.shv_family = hvsockAF
sa.reserved = 0
for i := 0; i < 16; i++ {
sa.shv_vm_id[i] = C.uchar(raddr.VMID[i])
}
for i := 0; i < 16; i++ {
sa.shv_service_id[i] = C.uchar(raddr.ServiceID[i])
}
// Retry connect in a loop if EINTR is encountered.
for {
if ret, errno := C.connect_sockaddr_hv(C.int(fd), &sa); ret != 0 {
if errno == syscall.EINTR {
continue
}
return nil, fmt.Errorf("connect(%s) failed with %d, errno=%d", raddr, ret, errno)
}
break
}
return newHVsockConn(uintptr(fd), &Addr{VMID: GUIDZero, ServiceID: GUIDZero}, &raddr), nil
}
// Listen returns a net.Listener which can accept connections on the given port
func Listen(addr Addr) (net.Listener, error) {
fd, err := syscall.Socket(hvsockAF, syscall.SOCK_STREAM, hvsockRaw)
if err != nil {
return nil, err
}
sa := C.struct_sockaddr_hv{}
sa.shv_family = hvsockAF
sa.reserved = 0
for i := 0; i < 16; i++ {
sa.shv_vm_id[i] = C.uchar(addr.VMID[i])
}
for i := 0; i < 16; i++ {
sa.shv_service_id[i] = C.uchar(addr.ServiceID[i])
}
if ret, errno := C.bind_sockaddr_hv(C.int(fd), &sa); ret != 0 {
return nil, fmt.Errorf("listen(%s) failed with %d, errno=%d", addr, ret, errno)
}
err = syscall.Listen(fd, syscall.SOMAXCONN)
if err != nil {
return nil, errors.Wrapf(err, "listen(%s) failed", addr)
}
return &hvsockListener{fd, addr}, nil
}
//
// Hyper-v sockets Listener implementation
//
type hvsockListener struct {
fd int
local Addr
}
// Accept accepts an incoming call and returns the new connection.
func (v *hvsockListener) Accept() (net.Conn, error) {
var acceptSA C.struct_sockaddr_hv
var acceptSALen C.socklen_t
acceptSALen = C.sizeof_struct_sockaddr_hv
fd, err := C.accept_hv(C.int(v.fd), &acceptSA, &acceptSALen)
if err != nil {
return nil, errors.Wrapf(err, "accept(%s) failed", v.local)
}
remote := &Addr{VMID: guidFromC(acceptSA.shv_vm_id), ServiceID: guidFromC(acceptSA.shv_service_id)}
return newHVsockConn(uintptr(fd), &v.local, remote), nil
}
// Close closes the listening connection
func (v *hvsockListener) Close() error {
// Note this won't cause the Accept to unblock.
return unix.Close(v.fd)
}
// Addr returns the address the Listener is listening on
func (v *hvsockListener) Addr() net.Addr {
return v.local
}
//
// Hyper-V socket connection implementation
//
// hvsockConn represents a connection over a Hyper-V socket
type hvsockConn struct {
hvsock *os.File
fd uintptr
local *Addr
remote *Addr
}
func newHVsockConn(fd uintptr, local, remote *Addr) *hvsockConn {
hvsock := os.NewFile(fd, fmt.Sprintf("hvsock:%d", fd))
return &hvsockConn{hvsock: hvsock, fd: fd, local: local, remote: remote}
}
// LocalAddr returns the local address of a connection
func (v *hvsockConn) LocalAddr() net.Addr {
return v.local
}
// RemoteAddr returns the remote address of a connection
func (v *hvsockConn) RemoteAddr() net.Addr {
return v.remote
}
// Close closes the connection
func (v *hvsockConn) Close() error {
return v.hvsock.Close()
}
// CloseRead shuts down the reading side of a hvsock connection
func (v *hvsockConn) CloseRead() error {
return syscall.Shutdown(int(v.fd), syscall.SHUT_RD)
}
// CloseWrite shuts down the writing side of a hvsock connection
func (v *hvsockConn) CloseWrite() error {
return syscall.Shutdown(int(v.fd), syscall.SHUT_WR)
}
// Read reads data from the connection
func (v *hvsockConn) Read(buf []byte) (int, error) {
return v.hvsock.Read(buf)
}
// Write writes data over the connection
// TODO(rn): replace with a straight call to v.hvsock.Write() once 4.9.x support is deprecated
func (v *hvsockConn) Write(buf []byte) (int, error) {
written := 0
toWrite := len(buf)
for toWrite > 0 {
thisBatch := min(toWrite, maxMsgSize)
n, err := v.hvsock.Write(buf[written : written+thisBatch])
if err != nil {
return written, err
}
if n != thisBatch {
return written, fmt.Errorf("short write %d != %d", n, thisBatch)
}
toWrite -= n
written += n
}
return written, nil
}
// SetDeadline sets the read and write deadlines associated with the connection
func (v *hvsockConn) SetDeadline(t time.Time) error {
return nil // FIXME
}
// SetReadDeadline sets the deadline for future Read calls.
func (v *hvsockConn) SetReadDeadline(t time.Time) error {
return nil // FIXME
}
// SetWriteDeadline sets the deadline for future Write calls
func (v *hvsockConn) SetWriteDeadline(t time.Time) error {
return nil // FIXME
}
// File duplicates the underlying socket descriptor and returns it.
func (v *hvsockConn) File() (*os.File, error) {
// This is equivalent to dup(2) but creates the new fd with CLOEXEC already set.
r0, _, e1 := syscall.Syscall(syscall.SYS_FCNTL, uintptr(v.hvsock.Fd()), syscall.F_DUPFD_CLOEXEC, 0)
if e1 != 0 {
return nil, os.NewSyscallError("fcntl", e1)
}
return os.NewFile(r0, v.hvsock.Name()), nil
}
func guidFromC(cg [16]C.uchar) GUID {
var g GUID
for i := 0; i < 16; i++ {
g[i] = byte(cg[i])
}
return g
}

View File

@ -0,0 +1,496 @@
package hvsock
import (
"fmt"
"io"
"log"
"net"
"runtime"
"sync"
"sync/atomic"
"syscall"
"time"
"unsafe"
"github.com/pkg/errors"
)
// Make sure Winsock2 is initialised
func init() {
e := syscall.WSAStartup(uint32(0x202), &wsaData)
if e != nil {
log.Fatal("WSAStartup", e)
}
}
const (
hvsockAF = 34 // AF_HYPERV
hvsockRaw = 1 // SHV_PROTO_RAW
)
var (
// ErrTimeout is an error returned on timeout
ErrTimeout = &timeoutError{}
wsaData syscall.WSAData
)
// Supported returns if hvsocks are supported on your platform
func Supported() bool {
return true
}
// Dial a Hyper-V socket address
func Dial(raddr Addr) (Conn, error) {
fd, err := syscall.Socket(hvsockAF, syscall.SOCK_STREAM, hvsockRaw)
if err != nil {
return nil, err
}
var sa rawSockaddrHyperv
ptr, n, err := raddr.sockaddr(&sa)
if err != nil {
return nil, err
}
if err := sys_connect(fd, ptr, n); err != nil {
return nil, errors.Wrapf(err, "connect(%s) failed", raddr)
}
return newHVsockConn(fd, Addr{VMID: GUIDZero, ServiceID: GUIDZero}, raddr)
}
// Listen returns a net.Listener which can accept connections on the given port
func Listen(addr Addr) (net.Listener, error) {
fd, err := syscall.Socket(hvsockAF, syscall.SOCK_STREAM, hvsockRaw)
if err != nil {
return nil, err
}
var sa rawSockaddrHyperv
ptr, n, err := addr.sockaddr(&sa)
if err != nil {
return nil, err
}
if err := sys_bind(fd, ptr, n); err != nil {
return nil, fmt.Errorf("bind(%s) failed with %v", addr, err)
}
err = syscall.Listen(fd, syscall.SOMAXCONN)
if err != nil {
return nil, errors.Wrapf(err, "listen(%s) failed", addr)
}
return &hvsockListener{fd, addr}, nil
}
//
// Hyper-v sockets Listener implementation
//
type hvsockListener struct {
fd syscall.Handle
local Addr
}
// Accept accepts an incoming call and returns the new connection
func (v *hvsockListener) Accept() (net.Conn, error) {
var sa rawSockaddrHyperv
var n = int32(unsafe.Sizeof(sa))
fd, err := sys_accept(v.fd, &sa, &n)
if err != nil {
return nil, err
}
// Extract an Addr from sa
raddr := Addr{}
for i := 0; i < len(raddr.VMID); i++ {
raddr.VMID[i] = sa.VMID[i]
}
for i := 0; i < len(raddr.ServiceID); i++ {
raddr.ServiceID[i] = sa.ServiceID[i]
}
return newHVsockConn(fd, v.local, raddr)
}
// Close closes the listening connection
func (v *hvsockListener) Close() error {
return syscall.Close(v.fd)
}
// Addr returns the address the Listener is listening on
func (v *hvsockListener) Addr() net.Addr {
return v.local
}
//
// Hyper-V socket connection implementation
//
// hvsockConn represent a Hyper-V connection. Complex mostly due to asynch send()/recv() syscalls.
type hvsockConn struct {
fd syscall.Handle
local Addr
remote Addr
wg sync.WaitGroup
wgLock sync.RWMutex
closing atomicBool
readDeadline deadlineHandler
writeDeadline deadlineHandler
}
func newHVsockConn(h syscall.Handle, local Addr, remote Addr) (*hvsockConn, error) {
ioInitOnce.Do(initIo)
v := &hvsockConn{fd: h, local: local, remote: remote}
_, err := createIoCompletionPort(h, ioCompletionPort, 0, 0xffffffff)
if err != nil {
return nil, err
}
err = setFileCompletionNotificationModes(h,
cFILE_SKIP_COMPLETION_PORT_ON_SUCCESS|cFILE_SKIP_SET_EVENT_ON_HANDLE)
if err != nil {
return nil, err
}
v.readDeadline.channel = make(timeoutChan)
v.writeDeadline.channel = make(timeoutChan)
return v, nil
}
// LocalAddr returns the local address of a connection
func (v *hvsockConn) LocalAddr() net.Addr {
return v.local
}
// RemoteAddr returns the remote address of a connection
func (v *hvsockConn) RemoteAddr() net.Addr {
return v.remote
}
// Close closes the connection
func (v *hvsockConn) Close() error {
v.close()
return nil
}
// CloseRead shuts down the reading side of a hvsock connection
func (v *hvsockConn) CloseRead() error {
return syscall.Shutdown(v.fd, syscall.SHUT_RD)
}
// CloseWrite shuts down the writing side of a hvsock connection
func (v *hvsockConn) CloseWrite() error {
return syscall.Shutdown(v.fd, syscall.SHUT_WR)
}
// Read reads data from the connection
func (v *hvsockConn) Read(buf []byte) (int, error) {
var b syscall.WSABuf
var f uint32
b.Len = uint32(len(buf))
b.Buf = &buf[0]
c, err := v.prepareIo()
if err != nil {
return 0, err
}
defer v.wg.Done()
if v.readDeadline.timedout.isSet() {
return 0, ErrTimeout
}
var bytes uint32
err = syscall.WSARecv(v.fd, &b, 1, &bytes, &f, &c.o, nil)
n, err := v.asyncIo(c, &v.readDeadline, bytes, err)
runtime.KeepAlive(buf)
// Handle EOF conditions.
if err == nil && n == 0 && len(buf) != 0 {
return 0, io.EOF
} else if err == syscall.ERROR_BROKEN_PIPE {
return 0, io.EOF
} else {
return n, err
}
}
// Write writes data over the connection
// TODO(rn): Remove once 4.9.x support is deprecated
func (v *hvsockConn) Write(buf []byte) (int, error) {
written := 0
toWrite := len(buf)
for toWrite > 0 {
thisBatch := min(toWrite, maxMsgSize)
n, err := v.write(buf[written : written+thisBatch])
if err != nil {
return written, err
}
if n != thisBatch {
return written, fmt.Errorf("short write %d != %d", n, thisBatch)
}
toWrite -= n
written += n
}
return written, nil
}
func (v *hvsockConn) write(buf []byte) (int, error) {
var b syscall.WSABuf
var f uint32
if len(buf) == 0 {
return 0, nil
}
f = 0
b.Len = uint32(len(buf))
b.Buf = &buf[0]
c, err := v.prepareIo()
if err != nil {
return 0, err
}
defer v.wg.Done()
if v.writeDeadline.timedout.isSet() {
return 0, ErrTimeout
}
var bytes uint32
err = syscall.WSASend(v.fd, &b, 1, &bytes, f, &c.o, nil)
n, err := v.asyncIo(c, &v.writeDeadline, bytes, err)
runtime.KeepAlive(buf)
return n, err
}
// SetReadDeadline implementation for Hyper-V sockets
func (v *hvsockConn) SetReadDeadline(deadline time.Time) error {
return v.readDeadline.set(deadline)
}
// SetWriteDeadline implementation for Hyper-V sockets
func (v *hvsockConn) SetWriteDeadline(deadline time.Time) error {
return v.writeDeadline.set(deadline)
}
// SetDeadline implementation for Hyper-V sockets
func (v *hvsockConn) SetDeadline(deadline time.Time) error {
if err := v.SetReadDeadline(deadline); err != nil {
return err
}
return v.SetWriteDeadline(deadline)
}
// Helper functions for conversion to sockaddr
// struck sockaddr equivalent
type rawSockaddrHyperv struct {
Family uint16
Reserved uint16
VMID GUID
ServiceID GUID
}
// Utility function to build a struct sockaddr for syscalls.
func (a Addr) sockaddr(sa *rawSockaddrHyperv) (unsafe.Pointer, int32, error) {
sa.Family = hvsockAF
sa.Reserved = 0
for i := 0; i < len(sa.VMID); i++ {
sa.VMID[i] = a.VMID[i]
}
for i := 0; i < len(sa.ServiceID); i++ {
sa.ServiceID[i] = a.ServiceID[i]
}
return unsafe.Pointer(sa), int32(unsafe.Sizeof(*sa)), nil
}
// Help for read/write timeouts
type deadlineHandler struct {
setLock sync.Mutex
channel timeoutChan
channelLock sync.RWMutex
timer *time.Timer
timedout atomicBool
}
// The code below here is adjusted from:
// https://github.com/Microsoft/go-winio/blob/master/file.go
type atomicBool int32
func (b *atomicBool) isSet() bool { return atomic.LoadInt32((*int32)(b)) != 0 }
func (b *atomicBool) setFalse() { atomic.StoreInt32((*int32)(b), 0) }
func (b *atomicBool) setTrue() { atomic.StoreInt32((*int32)(b), 1) }
func (b *atomicBool) swap(new bool) bool {
var newInt int32
if new {
newInt = 1
}
return atomic.SwapInt32((*int32)(b), newInt) == 1
}
type timeoutError struct{}
func (e *timeoutError) Error() string { return "i/o timeout" }
func (e *timeoutError) Timeout() bool { return true }
func (e *timeoutError) Temporary() bool { return true }
type timeoutChan chan struct{}
var ioInitOnce sync.Once
var ioCompletionPort syscall.Handle
// ioResult contains the result of an asynchronous IO operation
type ioResult struct {
bytes uint32
err error
}
type ioOperation struct {
o syscall.Overlapped
ch chan ioResult
}
func initIo() {
h, err := createIoCompletionPort(syscall.InvalidHandle, 0, 0, 0xffffffff)
if err != nil {
panic(err)
}
ioCompletionPort = h
go ioCompletionProcessor(h)
}
func (v *hvsockConn) close() {
v.wgLock.Lock()
if !v.closing.swap(true) {
v.wgLock.Unlock()
// cancel all IO and wait for it to complete
cancelIoEx(v.fd, nil)
v.wg.Wait()
// at this point, no new IO can start
syscall.Close(v.fd)
v.fd = 0
} else {
v.wgLock.Unlock()
}
}
// prepareIo prepares for a new IO operation
func (v *hvsockConn) prepareIo() (*ioOperation, error) {
v.wgLock.RLock()
if v.closing.isSet() {
v.wgLock.RUnlock()
return nil, fmt.Errorf("HvSocket has already been closed")
}
v.wg.Add(1)
v.wgLock.RUnlock()
c := &ioOperation{}
c.ch = make(chan ioResult)
return c, nil
}
// ioCompletionProcessor processes completed async IOs forever
func ioCompletionProcessor(h syscall.Handle) {
// Set the timer resolution to 1. This fixes a performance regression in golang 1.6.
timeBeginPeriod(1)
for {
var bytes uint32
var key uintptr
var op *ioOperation
err := getQueuedCompletionStatus(h, &bytes, &key, &op, syscall.INFINITE)
if op == nil {
panic(err)
}
op.ch <- ioResult{bytes, err}
}
}
// asyncIo processes the return value from Recv or Send, blocking until
// the operation has actually completed.
func (v *hvsockConn) asyncIo(c *ioOperation, d *deadlineHandler, bytes uint32, err error) (int, error) {
if err != syscall.ERROR_IO_PENDING {
return int(bytes), err
}
if v.closing.isSet() {
cancelIoEx(v.fd, &c.o)
}
var timeout timeoutChan
if d != nil {
d.channelLock.Lock()
timeout = d.channel
d.channelLock.Unlock()
}
var r ioResult
select {
case r = <-c.ch:
err = r.err
if err == syscall.ERROR_OPERATION_ABORTED {
if v.closing.isSet() {
err = fmt.Errorf("HvSocket has already been closed")
}
}
case <-timeout:
cancelIoEx(v.fd, &c.o)
r = <-c.ch
err = r.err
if err == syscall.ERROR_OPERATION_ABORTED {
err = ErrTimeout
}
}
// runtime.KeepAlive is needed, as c is passed via native
// code to ioCompletionProcessor, c must remain alive
// until the channel read is complete.
runtime.KeepAlive(c)
return int(r.bytes), err
}
func (d *deadlineHandler) set(deadline time.Time) error {
d.setLock.Lock()
defer d.setLock.Unlock()
if d.timer != nil {
if !d.timer.Stop() {
<-d.channel
}
d.timer = nil
}
d.timedout.setFalse()
select {
case <-d.channel:
d.channelLock.Lock()
d.channel = make(chan struct{})
d.channelLock.Unlock()
default:
}
if deadline.IsZero() {
return nil
}
timeoutIO := func() {
d.timedout.setTrue()
close(d.channel)
}
now := time.Now()
duration := deadline.Sub(now)
if deadline.After(now) {
// Deadline is in the future, set a timer to wait
d.timer = time.AfterFunc(duration, timeoutIO)
} else {
// Deadline is in the past. Cancel all pending IO now.
timeoutIO()
}
return nil
}

View File

@ -0,0 +1,171 @@
package hvsock
/*
Most of this code was derived from: https://github.com/Microsoft/go-winio
which has the following license:
The MIT License (MIT)
Copyright (c) 2015 Microsoft
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
import (
"syscall"
"unsafe"
)
var (
modws2_32 = syscall.NewLazyDLL("ws2_32.dll")
modwinmm = syscall.NewLazyDLL("winmm.dll")
modkernel32 = syscall.NewLazyDLL("kernel32.dll")
procConnect = modws2_32.NewProc("connect")
procBind = modws2_32.NewProc("bind")
procAccept = modws2_32.NewProc("accept")
procCancelIoEx = modkernel32.NewProc("CancelIoEx")
procCreateIoCompletionPort = modkernel32.NewProc("CreateIoCompletionPort")
procGetQueuedCompletionStatus = modkernel32.NewProc("GetQueuedCompletionStatus")
procSetFileCompletionNotificationModes = modkernel32.NewProc("SetFileCompletionNotificationModes")
proctimeBeginPeriod = modwinmm.NewProc("timeBeginPeriod")
)
// Do the interface allocations only once for common
// Errno values.
const (
errnoERROR_IO_PENDING = 997
socketError = uintptr(^uint32(0))
cFILE_SKIP_COMPLETION_PORT_ON_SUCCESS = 1
cFILE_SKIP_SET_EVENT_ON_HANDLE = 2
)
var (
errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING)
)
// errnoErr returns common boxed Errno values, to prevent
// allocations at runtime.
func errnoErr(e syscall.Errno) error {
switch e {
case 0:
return nil
case errnoERROR_IO_PENDING:
return errERROR_IO_PENDING
}
// TODO: add more here, after collecting data on the common
// error values see on Windows. (perhaps when running
// all.bat?)
return e
}
func sys_connect(s syscall.Handle, name unsafe.Pointer, namelen int32) (err error) {
r1, _, e1 := syscall.Syscall(procConnect.Addr(), 3, uintptr(s), uintptr(name), uintptr(namelen))
if r1 == socketError {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func sys_bind(s syscall.Handle, name unsafe.Pointer, namelen int32) (err error) {
r1, _, e1 := syscall.Syscall(procBind.Addr(), 3, uintptr(s), uintptr(name), uintptr(namelen))
if r1 == socketError {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func sys_accept(s syscall.Handle, rsa *rawSockaddrHyperv, addrlen *int32) (handle syscall.Handle, err error) {
r1, _, e1 := syscall.Syscall(procAccept.Addr(), 3, uintptr(s), uintptr(unsafe.Pointer(rsa)), uintptr(unsafe.Pointer(addrlen)))
handle = syscall.Handle(r1)
if r1 == socketError {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func cancelIoEx(file syscall.Handle, o *syscall.Overlapped) (err error) {
r1, _, e1 := syscall.Syscall(procCancelIoEx.Addr(), 2, uintptr(file), uintptr(unsafe.Pointer(o)), 0)
if r1 == 0 {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func createIoCompletionPort(file syscall.Handle, port syscall.Handle, key uintptr, threadCount uint32) (newport syscall.Handle, err error) {
r0, _, e1 := syscall.Syscall6(procCreateIoCompletionPort.Addr(), 4, uintptr(file), uintptr(port), uintptr(key), uintptr(threadCount), 0, 0)
newport = syscall.Handle(r0)
if newport == 0 {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func getQueuedCompletionStatus(port syscall.Handle, bytes *uint32, key *uintptr, o **ioOperation, timeout uint32) (err error) {
r1, _, e1 := syscall.Syscall6(procGetQueuedCompletionStatus.Addr(), 5, uintptr(port), uintptr(unsafe.Pointer(bytes)), uintptr(unsafe.Pointer(key)), uintptr(unsafe.Pointer(o)), uintptr(timeout), 0)
if r1 == 0 {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func setFileCompletionNotificationModes(h syscall.Handle, flags uint8) (err error) {
r1, _, e1 := syscall.Syscall(procSetFileCompletionNotificationModes.Addr(), 2, uintptr(h), uintptr(flags), 0)
if r1 == 0 {
if e1 != 0 {
err = errnoErr(e1)
} else {
err = syscall.EINVAL
}
}
return
}
func timeBeginPeriod(period uint32) (n int32) {
r0, _, _ := syscall.Syscall(proctimeBeginPeriod.Addr(), 1, uintptr(period), 0, 0)
n = int32(r0)
return
}