Don't return an error from dial if the balancer returns no initial servers (#1112)

This modifies the WithBlock behavior somewhat to block until there is at least
one valid connection.  Previously, each connection would be made serially until
all had completed successfully, with any errors returned to the caller.  Errors
are now only returned due to connecting to a backend if a balancer is not used,
or if there is an error starting the balancer itself.

Fixes #976
This commit is contained in:
dfawley
2017-03-21 11:35:53 -07:00
committed by GitHub
parent cdee119ee2
commit c5a5dbc500
3 changed files with 101 additions and 64 deletions

View File

@ -80,7 +80,6 @@ var (
errConnClosing = errors.New("grpc: the connection is closing")
// errConnUnavailable indicates that the connection is unavailable.
errConnUnavailable = errors.New("grpc: the connection is unavailable")
errNoAddr = errors.New("grpc: there is no address available to dial")
// minimum time to give a connection to complete
minConnectTimeout = 20 * time.Second
)
@ -359,17 +358,13 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *
}
cc.authority = target[:colonPos]
}
var ok bool
waitC := make(chan error, 1)
go func() {
var addrs []Address
defer close(waitC)
if cc.dopts.balancer == nil && cc.sc.LB != nil {
cc.dopts.balancer = cc.sc.LB
}
if cc.dopts.balancer == nil {
// Connect to target directly if balancer is nil.
addrs = append(addrs, Address{Addr: target})
} else {
if cc.dopts.balancer != nil {
var credsClone credentials.TransportCredentials
if creds != nil {
credsClone = creds.Clone()
@ -382,24 +377,22 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *
return
}
ch := cc.dopts.balancer.Notify()
if ch == nil {
// There is no name resolver installed.
addrs = append(addrs, Address{Addr: target})
} else {
addrs, ok = <-ch
if !ok || len(addrs) == 0 {
waitC <- errNoAddr
return
if ch != nil {
if cc.dopts.block {
doneChan := make(chan struct{})
go cc.lbWatcher(doneChan)
<-doneChan
} else {
go cc.lbWatcher(nil)
}
}
}
for _, a := range addrs {
if err := cc.resetAddrConn(a, false, nil); err != nil {
waitC <- err
return
}
}
close(waitC)
// No balancer, or no resolver within the balancer. Connect directly.
if err := cc.resetAddrConn(Address{Addr: target}, cc.dopts.block, nil); err != nil {
waitC <- err
return
}
}()
select {
case <-ctx.Done():
@ -410,15 +403,10 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *
}
}
// If balancer is nil or balancer.Notify() is nil, ok will be false here.
// The lbWatcher goroutine will not be created.
if ok {
go cc.lbWatcher()
}
if cc.dopts.scChan != nil {
go cc.scWatcher()
}
return cc, nil
}
@ -469,7 +457,10 @@ type ClientConn struct {
conns map[Address]*addrConn
}
func (cc *ClientConn) lbWatcher() {
// lbWatcher watches the Notify channel of the balancer in cc and manages
// connections accordingly. If doneChan is not nil, it is closed after the
// first successfull connection is made.
func (cc *ClientConn) lbWatcher(doneChan chan struct{}) {
for addrs := range cc.dopts.balancer.Notify() {
var (
add []Address // Addresses need to setup connections.
@ -496,7 +487,15 @@ func (cc *ClientConn) lbWatcher() {
}
cc.mu.Unlock()
for _, a := range add {
cc.resetAddrConn(a, true, nil)
if doneChan != nil {
err := cc.resetAddrConn(a, true, nil)
if err == nil {
close(doneChan)
doneChan = nil
}
} else {
cc.resetAddrConn(a, false, nil)
}
}
for _, c := range del {
c.tearDown(errConnDrain)
@ -525,7 +524,7 @@ func (cc *ClientConn) scWatcher() {
// resetAddrConn creates an addrConn for addr and adds it to cc.conns.
// If there is an old addrConn for addr, it will be torn down, using tearDownErr as the reason.
// If tearDownErr is nil, errConnDrain will be used instead.
func (cc *ClientConn) resetAddrConn(addr Address, skipWait bool, tearDownErr error) error {
func (cc *ClientConn) resetAddrConn(addr Address, block bool, tearDownErr error) error {
ac := &addrConn{
cc: cc,
addr: addr,
@ -575,8 +574,7 @@ func (cc *ClientConn) resetAddrConn(addr Address, skipWait bool, tearDownErr err
stale.tearDown(tearDownErr)
}
}
// skipWait may overwrite the decision in ac.dopts.block.
if ac.dopts.block && !skipWait {
if block {
if err := ac.resetTransport(false); err != nil {
if err != errConnClosing {
// Tear down ac and delete it from cc.conns.
@ -877,9 +875,9 @@ func (ac *addrConn) transportMonitor() {
// In both cases, a new ac is created.
select {
case <-t.Error():
ac.cc.resetAddrConn(ac.addr, true, errNetworkIO)
ac.cc.resetAddrConn(ac.addr, false, errNetworkIO)
default:
ac.cc.resetAddrConn(ac.addr, true, errConnDrain)
ac.cc.resetAddrConn(ac.addr, false, errConnDrain)
}
return
case <-t.Error():
@ -888,7 +886,7 @@ func (ac *addrConn) transportMonitor() {
t.Close()
return
case <-t.GoAway():
ac.cc.resetAddrConn(ac.addr, true, errNetworkIO)
ac.cc.resetAddrConn(ac.addr, false, errNetworkIO)
return
default:
}

View File

@ -253,3 +253,44 @@ func TestDialWithBlockErrorOnNonTemporaryErrorDialer(t *testing.T) {
t.Fatalf("Dial(%q) = %v, want %v", "", err, context.DeadlineExceeded)
}
}
// emptyBalancer returns an empty set of servers.
type emptyBalancer struct {
ch chan []Address
}
func newEmptyBalancer() Balancer {
return &emptyBalancer{ch: make(chan []Address, 1)}
}
func (b *emptyBalancer) Start(_ string, _ BalancerConfig) error {
b.ch <- nil
return nil
}
func (b *emptyBalancer) Up(_ Address) func(error) {
return nil
}
func (b *emptyBalancer) Get(_ context.Context, _ BalancerGetOptions) (Address, func(), error) {
return Address{}, nil, nil
}
func (b *emptyBalancer) Notify() <-chan []Address {
return b.ch
}
func (b *emptyBalancer) Close() error {
close(b.ch)
return nil
}
func TestNonblockingDialWithEmptyBalancer(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
dialDone := make(chan struct{})
go func() {
conn, err := DialContext(ctx, "Non-Existent.Server:80", WithInsecure(), WithBalancer(newEmptyBalancer()))
if err != nil {
t.Fatalf("unexpected error dialing connection: %v", err)
}
conn.Close()
close(dialDone)
}()
<-dialDone
cancel()
}

View File

@ -332,25 +332,19 @@ func TestDropRequest(t *testing.T) {
if err != nil {
t.Fatalf("Failed to generate the port number %v", err)
}
var bes []*lbpb.Server
be := &lbpb.Server{
IpAddress: []byte(beAddr1[0]),
Port: int32(bePort1),
LoadBalanceToken: lbToken,
DropRequest: true,
}
bes = append(bes, be)
be = &lbpb.Server{
IpAddress: []byte(beAddr2[0]),
Port: int32(bePort2),
LoadBalanceToken: lbToken,
DropRequest: false,
}
bes = append(bes, be)
sl := &lbpb.ServerList{
Servers: bes,
}
sls := []*lbpb.ServerList{sl}
sls := []*lbpb.ServerList{{
Servers: []*lbpb.Server{{
IpAddress: []byte(beAddr1[0]),
Port: int32(bePort1),
LoadBalanceToken: lbToken,
DropRequest: true,
}, {
IpAddress: []byte(beAddr2[0]),
Port: int32(bePort2),
LoadBalanceToken: lbToken,
DropRequest: false,
}},
}}
intervals := []time.Duration{0}
ls := newRemoteBalancer(sls, intervals)
lbpb.RegisterLoadBalancerServer(lb, ls)
@ -371,20 +365,24 @@ func TestDropRequest(t *testing.T) {
if err != nil {
t.Fatalf("Failed to dial to the backend %v", err)
}
// The 1st fail-fast RPC should fail because the 1st backend has DropRequest set to true.
helloC := hwpb.NewGreeterClient(cc)
if _, err := helloC.SayHello(context.Background(), &hwpb.HelloRequest{Name: "grpc"}); grpc.Code(err) != codes.Unavailable {
t.Fatalf("%v.SayHello(_, _) = _, %v, want _, %s", helloC, err, codes.Unavailable)
}
// The 2nd fail-fast RPC should succeed since it chooses the non-drop-request backend according
// to the round robin policy.
if _, err := helloC.SayHello(context.Background(), &hwpb.HelloRequest{Name: "grpc"}); err != nil {
t.Fatalf("%v.SayHello(_, _) = _, %v, want _, <nil>", helloC, err)
}
// The 3nd non-fail-fast RPC should succeed.
// The 1st, non-fail-fast RPC should succeed. This ensures both server
// connections are made, because the first one has DropRequest set to true.
if _, err := helloC.SayHello(context.Background(), &hwpb.HelloRequest{Name: "grpc"}, grpc.FailFast(false)); err != nil {
t.Fatalf("%v.SayHello(_, _) = _, %v, want _, <nil>", helloC, err)
}
for i := 0; i < 3; i++ {
// Odd fail-fast RPCs should fail, because the 1st backend has DropRequest
// set to true.
if _, err := helloC.SayHello(context.Background(), &hwpb.HelloRequest{Name: "grpc"}); grpc.Code(err) != codes.Unavailable {
t.Fatalf("%v.SayHello(_, _) = _, %v, want _, %s", helloC, err, codes.Unavailable)
}
// Even fail-fast RPCs should succeed since they choose the
// non-drop-request backend according to the round robin policy.
if _, err := helloC.SayHello(context.Background(), &hwpb.HelloRequest{Name: "grpc"}); err != nil {
t.Fatalf("%v.SayHello(_, _) = _, %v, want _, <nil>", helloC, err)
}
}
cc.Close()
}