From e35bf029142567fc91597ffcafe5666a1e5649b6 Mon Sep 17 00:00:00 2001 From: iamqizhao Date: Tue, 28 Jul 2015 11:12:07 -0700 Subject: [PATCH 1/2] revise reconnect backoff strategy --- clientconn.go | 16 +++++++++++++++- rpc_util.go | 25 +++++++++++++------------ transport/http2_client.go | 2 +- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/clientconn.go b/clientconn.go index bc6d6f5b..37f78de4 100644 --- a/clientconn.go +++ b/clientconn.go @@ -55,6 +55,8 @@ var ( // ErrClientConnTimeout indicates that the connection could not be // established or re-established within the specified timeout. ErrClientConnTimeout = errors.New("grpc: timed out trying to connect") + // minimum time to give a connection to complete + minConnectTimeout = 20 * time.Second ) // dialOptions configure a Dial call. dialOptions are set by the DialOption @@ -211,9 +213,21 @@ func (cc *ClientConn) resetTransport(closeTransport bool) error { return ErrClientConnTimeout } } + sleepTime := backoff(retries) + timeout := sleepTime + if timeout < minConnectTimeout { + timeout = minConnectTimeout + } + if copts.Timeout > timeout { + copts.Timeout = timeout + } + connectTime := time.Now() newTransport, err := transport.NewClientTransport(cc.target, &copts) if err != nil { - sleepTime := backoff(retries) + sleepTime -= time.Since(connectTime) + if sleepTime < 0 { + sleepTime = 0 + } // Fail early before falling into sleep. if cc.dopts.copts.Timeout > 0 && cc.dopts.copts.Timeout < sleepTime+time.Since(start) { cc.Close() diff --git a/rpc_util.go b/rpc_util.go index 49512603..a0f0b48b 100644 --- a/rpc_util.go +++ b/rpc_util.go @@ -277,28 +277,29 @@ func convertCode(err error) codes.Code { const ( // how long to wait after the first failure before retrying baseDelay = 1.0 * time.Second - // upper bound on backoff delay - maxDelay = 120 * time.Second - backoffFactor = 2.0 // backoff increases by this factor on each retry - backoffRange = 0.4 // backoff is randomized downwards by this factor + // upper bound of backoff delay + maxDelay = 120 * time.Second + // backoff increases by this factor on each retry + backoffFactor = 1.6 + // backoff is randomized downwards by this factor + backoffJitter = 0.2 ) -// backoff returns a value in [0, maxDelay] that increases exponentially with -// retries, starting from baseDelay. -func backoff(retries int) time.Duration { +func backoff(retries int) (t time.Duration) { + if retries == 0 { + return baseDelay + } backoff, max := float64(baseDelay), float64(maxDelay) for backoff < max && retries > 0 { - backoff = backoff * backoffFactor + backoff *= backoffFactor retries-- } if backoff > max { backoff = max } - // Randomize backoff delays so that if a cluster of requests start at - // the same time, they won't operate in lockstep. We just subtract up - // to 40% so that we obey maxDelay. - backoff -= backoff * backoffRange * rand.Float64() + // the same time, they won't operate in lockstep. + backoff *= 1 + backoffJitter*(rand.Float64()*2-1) if backoff < 0 { return 0 } diff --git a/transport/http2_client.go b/transport/http2_client.go index 6cac6e82..f956b1ea 100644 --- a/transport/http2_client.go +++ b/transport/http2_client.go @@ -555,7 +555,7 @@ func (t *http2Client) handleData(f *http2.DataFrame) { // the read direction is closed, and set the status appropriately. if f.FrameHeader.Flags.Has(http2.FlagDataEndStream) { s.mu.Lock() - if (s.state == streamWriteDone) { + if s.state == streamWriteDone { s.state = streamDone } else { s.state = streamReadDone From bce3d5e24b1ac6d7d983a01dbe3aa969604743f3 Mon Sep 17 00:00:00 2001 From: iamqizhao Date: Tue, 28 Jul 2015 11:24:31 -0700 Subject: [PATCH 2/2] small fix --- clientconn.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clientconn.go b/clientconn.go index 37f78de4..d9f7077b 100644 --- a/clientconn.go +++ b/clientconn.go @@ -218,7 +218,7 @@ func (cc *ClientConn) resetTransport(closeTransport bool) error { if timeout < minConnectTimeout { timeout = minConnectTimeout } - if copts.Timeout > timeout { + if copts.Timeout == 0 || copts.Timeout > timeout { copts.Timeout = timeout } connectTime := time.Now()