internal: fix client send preface problems (#2380)
internal: fix client send preface problems This CL fixes three problems: - In clientconn_state_transitions_test.go, sometimes tests would flake because there's not enough buffer to send client side settings, causing the connection to unpredictably enter TRANSIENT FAILURE. Each time we set up a server to send SETTINGS, we should also set up the server to read. This allows the client to successfully send its SETTINGS, unflaking the test. - In clientconn.go, we incorrectly transitioned into TRANSIENT FAILURE when creating an http2client returned an error. This should be handled in the outer resetTransport main reset loop. The reason this became a problem is that the outer resetTransport has very specific conditions around when to transition into TRANSIENT FAILURE that the egregious transition did not have. So, it could transition into TRANSIENT FAILURE after failing to dial, even if it was trying to connect to a non-final address in the list of addresses. - In clientconn.go, we incorrectly stay in CONNECTING after `createTransport` when a server sends its connection preface but the client is not able to send its connection preface. This CL causes the addrconn to correctly enter TRANSIENT FAILURE when `createTransport` fails, even if a server preface was received. It does so by making ac.successfulHandshake to consider both server preface received as well as client preface sent.
This commit is contained in:
@ -1059,6 +1059,10 @@ func (ac *addrConn) createTransport(backoffNum int, addr resolver.Address, copts
|
||||
prefaceReceived := make(chan struct{})
|
||||
onCloseCalled := make(chan struct{})
|
||||
|
||||
var prefaceMu sync.Mutex
|
||||
var serverPrefaceReceived bool
|
||||
var clientPrefaceWrote bool
|
||||
|
||||
onGoAway := func(r transport.GoAwayReason) {
|
||||
ac.mu.Lock()
|
||||
ac.adjustParams(r)
|
||||
@ -1100,11 +1104,18 @@ func (ac *addrConn) createTransport(backoffNum int, addr resolver.Address, copts
|
||||
|
||||
// TODO(deklerk): optimization; does anyone else actually use this lock? maybe we can just remove it for this scope
|
||||
ac.mu.Lock()
|
||||
ac.successfulHandshake = true
|
||||
ac.backoffDeadline = time.Time{}
|
||||
ac.connectDeadline = time.Time{}
|
||||
ac.addrIdx = 0
|
||||
ac.backoffIdx = 0
|
||||
|
||||
prefaceMu.Lock()
|
||||
serverPrefaceReceived = true
|
||||
if clientPrefaceWrote {
|
||||
ac.successfulHandshake = true
|
||||
ac.backoffDeadline = time.Time{}
|
||||
ac.connectDeadline = time.Time{}
|
||||
ac.addrIdx = 0
|
||||
ac.backoffIdx = 0
|
||||
}
|
||||
prefaceMu.Unlock()
|
||||
|
||||
ac.mu.Unlock()
|
||||
}
|
||||
|
||||
@ -1117,6 +1128,13 @@ func (ac *addrConn) createTransport(backoffNum int, addr resolver.Address, copts
|
||||
newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, target, copts, onPrefaceReceipt, onGoAway, onClose)
|
||||
|
||||
if err == nil {
|
||||
prefaceMu.Lock()
|
||||
clientPrefaceWrote = true
|
||||
if serverPrefaceReceived {
|
||||
ac.successfulHandshake = true
|
||||
}
|
||||
prefaceMu.Unlock()
|
||||
|
||||
if ac.dopts.waitForHandshake {
|
||||
select {
|
||||
case <-prefaceTimer.C:
|
||||
@ -1160,8 +1178,6 @@ func (ac *addrConn) createTransport(backoffNum int, addr resolver.Address, copts
|
||||
|
||||
return errConnClosing
|
||||
}
|
||||
ac.updateConnectivityState(connectivity.TransientFailure)
|
||||
ac.cc.handleSubConnStateChange(ac.acbw, ac.state)
|
||||
ac.mu.Unlock()
|
||||
grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v. Err :%v. Reconnecting...", addr, err)
|
||||
|
||||
|
Reference in New Issue
Block a user