Merge pull request #864 from tamird/retry-timeout
transport: robustly detect temporary errors
This commit is contained in:
@ -413,8 +413,7 @@ func newTest(t *testing.T, e env) *test {
|
|||||||
// call to te.tearDown to clean up.
|
// call to te.tearDown to clean up.
|
||||||
func (te *test) startServer(ts testpb.TestServiceServer) {
|
func (te *test) startServer(ts testpb.TestServiceServer) {
|
||||||
te.testServer = ts
|
te.testServer = ts
|
||||||
e := te.e
|
te.t.Logf("Running test in %s environment...", te.e.name)
|
||||||
te.t.Logf("Running test in %s environment...", e.name)
|
|
||||||
sopts := []grpc.ServerOption{grpc.MaxConcurrentStreams(te.maxStream)}
|
sopts := []grpc.ServerOption{grpc.MaxConcurrentStreams(te.maxStream)}
|
||||||
if te.maxMsgSize > 0 {
|
if te.maxMsgSize > 0 {
|
||||||
sopts = append(sopts, grpc.MaxMsgSize(te.maxMsgSize))
|
sopts = append(sopts, grpc.MaxMsgSize(te.maxMsgSize))
|
||||||
@ -432,12 +431,12 @@ func (te *test) startServer(ts testpb.TestServiceServer) {
|
|||||||
sopts = append(sopts, grpc.StreamInterceptor(te.streamInt))
|
sopts = append(sopts, grpc.StreamInterceptor(te.streamInt))
|
||||||
}
|
}
|
||||||
la := "localhost:0"
|
la := "localhost:0"
|
||||||
switch e.network {
|
switch te.e.network {
|
||||||
case "unix":
|
case "unix":
|
||||||
la = "/tmp/testsock" + fmt.Sprintf("%d", time.Now())
|
la = "/tmp/testsock" + fmt.Sprintf("%d", time.Now())
|
||||||
syscall.Unlink(la)
|
syscall.Unlink(la)
|
||||||
}
|
}
|
||||||
lis, err := net.Listen(e.network, la)
|
lis, err := net.Listen(te.e.network, la)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
te.t.Fatalf("Failed to listen: %v", err)
|
te.t.Fatalf("Failed to listen: %v", err)
|
||||||
}
|
}
|
||||||
@ -455,7 +454,7 @@ func (te *test) startServer(ts testpb.TestServiceServer) {
|
|||||||
}
|
}
|
||||||
s := grpc.NewServer(sopts...)
|
s := grpc.NewServer(sopts...)
|
||||||
te.srv = s
|
te.srv = s
|
||||||
if e.httpHandler {
|
if te.e.httpHandler {
|
||||||
internal.TestingUseHandlerImpl(s)
|
internal.TestingUseHandlerImpl(s)
|
||||||
}
|
}
|
||||||
if te.healthServer != nil {
|
if te.healthServer != nil {
|
||||||
@ -465,7 +464,7 @@ func (te *test) startServer(ts testpb.TestServiceServer) {
|
|||||||
testpb.RegisterTestServiceServer(s, te.testServer)
|
testpb.RegisterTestServiceServer(s, te.testServer)
|
||||||
}
|
}
|
||||||
addr := la
|
addr := la
|
||||||
switch e.network {
|
switch te.e.network {
|
||||||
case "unix":
|
case "unix":
|
||||||
default:
|
default:
|
||||||
_, port, err := net.SplitHostPort(lis.Addr().String())
|
_, port, err := net.SplitHostPort(lis.Addr().String())
|
||||||
@ -566,7 +565,7 @@ func testTimeoutOnDeadServer(t *testing.T, e env) {
|
|||||||
te.srv.Stop()
|
te.srv.Stop()
|
||||||
ctx, _ := context.WithTimeout(context.Background(), time.Millisecond)
|
ctx, _ := context.WithTimeout(context.Background(), time.Millisecond)
|
||||||
if _, err := tc.EmptyCall(ctx, &testpb.Empty{}, grpc.FailFast(false)); grpc.Code(err) != codes.DeadlineExceeded {
|
if _, err := tc.EmptyCall(ctx, &testpb.Empty{}, grpc.FailFast(false)); grpc.Code(err) != codes.DeadlineExceeded {
|
||||||
t.Fatalf("TestService/EmptyCall(%v, _) = _, %v, want _, error code: %d", ctx, err, codes.DeadlineExceeded)
|
t.Fatalf("TestService/EmptyCall(%v, _) = _, %v, want _, error code: %s", ctx, err, codes.DeadlineExceeded)
|
||||||
}
|
}
|
||||||
awaitNewConnLogOutput()
|
awaitNewConnLogOutput()
|
||||||
}
|
}
|
||||||
@ -849,10 +848,10 @@ func testFailFast(t *testing.T, e env) {
|
|||||||
}
|
}
|
||||||
// The client keeps reconnecting and ongoing fail-fast RPCs should fail with code.Unavailable.
|
// The client keeps reconnecting and ongoing fail-fast RPCs should fail with code.Unavailable.
|
||||||
if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}); grpc.Code(err) != codes.Unavailable {
|
if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}); grpc.Code(err) != codes.Unavailable {
|
||||||
t.Fatalf("TestService/EmptyCall(_, _, _) = _, %v, want _, error code: %d", err, codes.Unavailable)
|
t.Fatalf("TestService/EmptyCall(_, _, _) = _, %v, want _, error code: %s", err, codes.Unavailable)
|
||||||
}
|
}
|
||||||
if _, err := tc.StreamingInputCall(context.Background()); grpc.Code(err) != codes.Unavailable {
|
if _, err := tc.StreamingInputCall(context.Background()); grpc.Code(err) != codes.Unavailable {
|
||||||
t.Fatalf("TestService/StreamingInputCall(_) = _, %v, want _, error code: %d", err, codes.Unavailable)
|
t.Fatalf("TestService/StreamingInputCall(_) = _, %v, want _, error code: %s", err, codes.Unavailable)
|
||||||
}
|
}
|
||||||
|
|
||||||
awaitNewConnLogOutput()
|
awaitNewConnLogOutput()
|
||||||
@ -1112,7 +1111,7 @@ func testExceedMsgLimit(t *testing.T, e env) {
|
|||||||
Payload: payload,
|
Payload: payload,
|
||||||
}
|
}
|
||||||
if _, err := tc.UnaryCall(context.Background(), req); err == nil || grpc.Code(err) != codes.Internal {
|
if _, err := tc.UnaryCall(context.Background(), req); err == nil || grpc.Code(err) != codes.Internal {
|
||||||
t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %d", err, codes.Internal)
|
t.Fatalf("TestService/UnaryCall(_, _) = _, %v, want _, error code: %s", err, codes.Internal)
|
||||||
}
|
}
|
||||||
|
|
||||||
stream, err := tc.FullDuplexCall(te.ctx)
|
stream, err := tc.FullDuplexCall(te.ctx)
|
||||||
@ -1139,7 +1138,7 @@ func testExceedMsgLimit(t *testing.T, e env) {
|
|||||||
t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err)
|
t.Fatalf("%v.Send(%v) = %v, want <nil>", stream, sreq, err)
|
||||||
}
|
}
|
||||||
if _, err := stream.Recv(); err == nil || grpc.Code(err) != codes.Internal {
|
if _, err := stream.Recv(); err == nil || grpc.Code(err) != codes.Internal {
|
||||||
t.Fatalf("%v.Recv() = _, %v, want _, error code: %d", stream, err, codes.Internal)
|
t.Fatalf("%v.Recv() = _, %v, want _, error code: %s", stream, err, codes.Internal)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1331,7 +1330,7 @@ func testRPCTimeout(t *testing.T, e env) {
|
|||||||
for i := -1; i <= 10; i++ {
|
for i := -1; i <= 10; i++ {
|
||||||
ctx, _ := context.WithTimeout(context.Background(), time.Duration(i)*time.Millisecond)
|
ctx, _ := context.WithTimeout(context.Background(), time.Duration(i)*time.Millisecond)
|
||||||
if _, err := tc.UnaryCall(ctx, req); grpc.Code(err) != codes.DeadlineExceeded {
|
if _, err := tc.UnaryCall(ctx, req); grpc.Code(err) != codes.DeadlineExceeded {
|
||||||
t.Fatalf("TestService/UnaryCallv(_, _) = _, %v; want <nil>, error code: %d", err, codes.DeadlineExceeded)
|
t.Fatalf("TestService/UnaryCallv(_, _) = _, %v; want <nil>, error code: %s", err, codes.DeadlineExceeded)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1368,7 +1367,7 @@ func testCancel(t *testing.T, e env) {
|
|||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
time.AfterFunc(1*time.Millisecond, cancel)
|
time.AfterFunc(1*time.Millisecond, cancel)
|
||||||
if r, err := tc.UnaryCall(ctx, req); grpc.Code(err) != codes.Canceled {
|
if r, err := tc.UnaryCall(ctx, req); grpc.Code(err) != codes.Canceled {
|
||||||
t.Fatalf("TestService/UnaryCall(_, _) = %v, %v; want _, error code: %d", r, err, codes.Canceled)
|
t.Fatalf("TestService/UnaryCall(_, _) = %v, %v; want _, error code: %s", r, err, codes.Canceled)
|
||||||
}
|
}
|
||||||
awaitNewConnLogOutput()
|
awaitNewConnLogOutput()
|
||||||
}
|
}
|
||||||
@ -2305,7 +2304,7 @@ func (c clientAlwaysFailCred) Info() credentials.ProtocolInfo {
|
|||||||
|
|
||||||
func TestDialWithBlockErrorOnBadCertificates(t *testing.T) {
|
func TestDialWithBlockErrorOnBadCertificates(t *testing.T) {
|
||||||
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: true})
|
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: true})
|
||||||
te.startServer(&testServer{security: "clientAlwaysFailCred"})
|
te.startServer(&testServer{security: te.e.security})
|
||||||
defer te.tearDown()
|
defer te.tearDown()
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@ -2321,7 +2320,7 @@ func TestDialWithBlockErrorOnBadCertificates(t *testing.T) {
|
|||||||
|
|
||||||
func TestFailFastRPCErrorOnBadCertificates(t *testing.T) {
|
func TestFailFastRPCErrorOnBadCertificates(t *testing.T) {
|
||||||
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: true})
|
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: true})
|
||||||
te.startServer(&testServer{security: "clientAlwaysFailCred"})
|
te.startServer(&testServer{security: te.e.security})
|
||||||
defer te.tearDown()
|
defer te.tearDown()
|
||||||
|
|
||||||
cc := te.clientConn()
|
cc := te.clientConn()
|
||||||
@ -2333,7 +2332,7 @@ func TestFailFastRPCErrorOnBadCertificates(t *testing.T) {
|
|||||||
|
|
||||||
func TestFailFastRPCWithNoBalancerErrorOnBadCertificates(t *testing.T) {
|
func TestFailFastRPCWithNoBalancerErrorOnBadCertificates(t *testing.T) {
|
||||||
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: false})
|
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: false})
|
||||||
te.startServer(&testServer{security: "clientAlwaysFailCred"})
|
te.startServer(&testServer{security: te.e.security})
|
||||||
defer te.tearDown()
|
defer te.tearDown()
|
||||||
|
|
||||||
cc := te.clientConn()
|
cc := te.clientConn()
|
||||||
@ -2345,7 +2344,7 @@ func TestFailFastRPCWithNoBalancerErrorOnBadCertificates(t *testing.T) {
|
|||||||
|
|
||||||
func TestNonFailFastRPCWithNoBalancerErrorOnBadCertificates(t *testing.T) {
|
func TestNonFailFastRPCWithNoBalancerErrorOnBadCertificates(t *testing.T) {
|
||||||
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: false})
|
te := newTest(t, env{name: "bad-cred", network: "tcp", security: "clientAlwaysFailCred", balancer: false})
|
||||||
te.startServer(&testServer{security: "clientAlwaysFailCred"})
|
te.startServer(&testServer{security: te.e.security})
|
||||||
defer te.tearDown()
|
defer te.tearDown()
|
||||||
|
|
||||||
cc := te.clientConn()
|
cc := te.clientConn()
|
||||||
@ -2376,7 +2375,7 @@ func (c *clientTimeoutCreds) Info() credentials.ProtocolInfo {
|
|||||||
func TestNonFailFastRPCSucceedOnTimeoutCreds(t *testing.T) {
|
func TestNonFailFastRPCSucceedOnTimeoutCreds(t *testing.T) {
|
||||||
te := newTest(t, env{name: "timeout-cred", network: "tcp", security: "clientTimeoutCreds", balancer: false})
|
te := newTest(t, env{name: "timeout-cred", network: "tcp", security: "clientTimeoutCreds", balancer: false})
|
||||||
te.userAgent = testAppUA
|
te.userAgent = testAppUA
|
||||||
te.startServer(&testServer{security: "clientTimeoutCreds"})
|
te.startServer(&testServer{security: te.e.security})
|
||||||
defer te.tearDown()
|
defer te.tearDown()
|
||||||
|
|
||||||
cc := te.clientConn()
|
cc := te.clientConn()
|
||||||
|
@ -114,14 +114,42 @@ func dial(fn func(context.Context, string) (net.Conn, error), ctx context.Contex
|
|||||||
return dialContext(ctx, "tcp", addr)
|
return dialContext(ctx, "tcp", addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isTemporary(err error) bool {
|
||||||
|
switch err {
|
||||||
|
case io.EOF:
|
||||||
|
// Connection closures may be resolved upon retry, and are thus
|
||||||
|
// treated as temporary.
|
||||||
|
return true
|
||||||
|
case context.DeadlineExceeded:
|
||||||
|
// In Go 1.7, context.DeadlineExceeded implements Timeout(), and this
|
||||||
|
// special case is not needed. Until then, we need to keep this
|
||||||
|
// clause.
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
switch err := err.(type) {
|
||||||
|
case interface {
|
||||||
|
Temporary() bool
|
||||||
|
}:
|
||||||
|
return err.Temporary()
|
||||||
|
case interface {
|
||||||
|
Timeout() bool
|
||||||
|
}:
|
||||||
|
// Timeouts may be resolved upon retry, and are thus treated as
|
||||||
|
// temporary.
|
||||||
|
return err.Timeout()
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// newHTTP2Client constructs a connected ClientTransport to addr based on HTTP2
|
// newHTTP2Client constructs a connected ClientTransport to addr based on HTTP2
|
||||||
// and starts to receive messages on it. Non-nil error returns if construction
|
// and starts to receive messages on it. Non-nil error returns if construction
|
||||||
// fails.
|
// fails.
|
||||||
func newHTTP2Client(ctx context.Context, addr string, opts ConnectOptions) (_ ClientTransport, err error) {
|
func newHTTP2Client(ctx context.Context, addr string, opts ConnectOptions) (_ ClientTransport, err error) {
|
||||||
scheme := "http"
|
scheme := "http"
|
||||||
conn, connErr := dial(opts.Dialer, ctx, addr)
|
conn, err := dial(opts.Dialer, ctx, addr)
|
||||||
if connErr != nil {
|
if err != nil {
|
||||||
return nil, ConnectionErrorf(true, connErr, "transport: %v", connErr)
|
return nil, ConnectionErrorf(true, err, "transport: %v", err)
|
||||||
}
|
}
|
||||||
// Any further errors will close the underlying connection
|
// Any further errors will close the underlying connection
|
||||||
defer func(conn net.Conn) {
|
defer func(conn net.Conn) {
|
||||||
@ -132,17 +160,13 @@ func newHTTP2Client(ctx context.Context, addr string, opts ConnectOptions) (_ Cl
|
|||||||
var authInfo credentials.AuthInfo
|
var authInfo credentials.AuthInfo
|
||||||
if creds := opts.TransportCredentials; creds != nil {
|
if creds := opts.TransportCredentials; creds != nil {
|
||||||
scheme = "https"
|
scheme = "https"
|
||||||
conn, authInfo, connErr = creds.ClientHandshake(ctx, addr, conn)
|
conn, authInfo, err = creds.ClientHandshake(ctx, addr, conn)
|
||||||
|
if err != nil {
|
||||||
|
// Credentials handshake errors are typically considered permanent
|
||||||
|
// to avoid retrying on e.g. bad certificates.
|
||||||
|
temp := isTemporary(err)
|
||||||
|
return nil, ConnectionErrorf(temp, err, "transport: %v", err)
|
||||||
}
|
}
|
||||||
if connErr != nil {
|
|
||||||
// Credentials handshake error is not a temporary error (unless the error
|
|
||||||
// was the connection closing or deadline exceeded).
|
|
||||||
var temp bool
|
|
||||||
switch connErr {
|
|
||||||
case io.EOF, context.DeadlineExceeded:
|
|
||||||
temp = true
|
|
||||||
}
|
|
||||||
return nil, ConnectionErrorf(temp, connErr, "transport: %v", connErr)
|
|
||||||
}
|
}
|
||||||
ua := primaryUA
|
ua := primaryUA
|
||||||
if opts.UserAgent != "" {
|
if opts.UserAgent != "" {
|
||||||
|
Reference in New Issue
Block a user