Support fail-fast mode and make it the default
This commit is contained in:
		
							
								
								
									
										22
									
								
								balancer.go
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								balancer.go
									
									
									
									
									
								
							| @ -94,10 +94,10 @@ type Balancer interface { | ||||
| 	// instead of blocking. | ||||
| 	// | ||||
| 	// The function returns put which is called once the rpc has completed or failed. | ||||
| 	// put can collect and report RPC stats to a remote load balancer. gRPC internals | ||||
| 	// will try to call this again if err is non-nil (unless err is ErrClientConnClosing). | ||||
| 	// put can collect and report RPC stats to a remote load balancer. | ||||
| 	// | ||||
| 	// TODO: Add other non-recoverable errors? | ||||
| 	// This function should only return the errors Balancer cannot recover by itself. | ||||
| 	// gRPC internals will fail the RPC if an error is returned. | ||||
| 	Get(ctx context.Context, opts BalancerGetOptions) (addr Address, put func(), err error) | ||||
| 	// Notify returns a channel that is used by gRPC internals to watch the addresses | ||||
| 	// gRPC needs to connect. The addresses might be from a name resolver or remote | ||||
| @ -298,8 +298,20 @@ func (rr *roundRobin) Get(ctx context.Context, opts BalancerGetOptions) (addr Ad | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	// There is no address available. Wait on rr.waitCh. | ||||
| 	// TODO(zhaoq): Handle the case when opts.BlockingWait is false. | ||||
| 	// There is no address available. | ||||
| 	if !opts.BlockingWait { | ||||
| 		if len(rr.addrs) == 0 { | ||||
| 			rr.mu.Unlock() | ||||
| 			err = fmt.Errorf("there is no address available") | ||||
| 			return | ||||
| 		} | ||||
| 		// Returns the next addr on rr.addrs for failfast RPCs. | ||||
| 		addr = rr.addrs[rr.next].addr | ||||
| 		rr.next++ | ||||
| 		rr.mu.Unlock() | ||||
| 		return | ||||
| 	} | ||||
| 	// Wait on rr.waitCh for non-failfast RPCs. | ||||
| 	if rr.waitCh == nil { | ||||
| 		ch = make(chan struct{}) | ||||
| 		rr.waitCh = ch | ||||
|  | ||||
| @ -239,7 +239,7 @@ func TestCloseWithPendingRPC(t *testing.T) { | ||||
| 		t.Fatalf("Failed to create ClientConn: %v", err) | ||||
| 	} | ||||
| 	var reply string | ||||
| 	if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc); err != nil { | ||||
| 	if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc, FailFast(false)); err != nil { | ||||
| 		t.Fatalf("grpc.Invoke(_, _, _, _, _) = %v, want %s", err, servers[0].port) | ||||
| 	} | ||||
| 	// Remove the server. | ||||
| @ -251,7 +251,7 @@ func TestCloseWithPendingRPC(t *testing.T) { | ||||
| 	// Loop until the above update applies. | ||||
| 	for { | ||||
| 		ctx, _ := context.WithTimeout(context.Background(), 10*time.Millisecond) | ||||
| 		if err := Invoke(ctx, "/foo/bar", &expectedRequest, &reply, cc); Code(err) == codes.DeadlineExceeded { | ||||
| 		if err := Invoke(ctx, "/foo/bar", &expectedRequest, &reply, cc, FailFast(false)); Code(err) == codes.DeadlineExceeded { | ||||
| 			break | ||||
| 		} | ||||
| 		time.Sleep(10 * time.Millisecond) | ||||
| @ -262,7 +262,7 @@ func TestCloseWithPendingRPC(t *testing.T) { | ||||
| 	go func() { | ||||
| 		defer wg.Done() | ||||
| 		var reply string | ||||
| 		if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc); err == nil { | ||||
| 		if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc, FailFast(false)); err == nil { | ||||
| 			t.Errorf("grpc.Invoke(_, _, _, _, _) = %v, want not nil", err) | ||||
| 		} | ||||
| 	}() | ||||
| @ -270,7 +270,7 @@ func TestCloseWithPendingRPC(t *testing.T) { | ||||
| 		defer wg.Done() | ||||
| 		var reply string | ||||
| 		time.Sleep(5 * time.Millisecond) | ||||
| 		if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc); err == nil { | ||||
| 		if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc, FailFast(false)); err == nil { | ||||
| 			t.Errorf("grpc.Invoke(_, _, _, _, _) = %v, want not nil", err) | ||||
| 		} | ||||
| 	}() | ||||
| @ -295,7 +295,7 @@ func TestGetOnWaitChannel(t *testing.T) { | ||||
| 	for { | ||||
| 		var reply string | ||||
| 		ctx, _ := context.WithTimeout(context.Background(), 10*time.Millisecond) | ||||
| 		if err := Invoke(ctx, "/foo/bar", &expectedRequest, &reply, cc); Code(err) == codes.DeadlineExceeded { | ||||
| 		if err := Invoke(ctx, "/foo/bar", &expectedRequest, &reply, cc, FailFast(false)); Code(err) == codes.DeadlineExceeded { | ||||
| 			break | ||||
| 		} | ||||
| 		time.Sleep(10 * time.Millisecond) | ||||
| @ -305,7 +305,7 @@ func TestGetOnWaitChannel(t *testing.T) { | ||||
| 	go func() { | ||||
| 		defer wg.Done() | ||||
| 		var reply string | ||||
| 		if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc); err != nil { | ||||
| 		if err := Invoke(context.Background(), "/foo/bar", &expectedRequest, &reply, cc, FailFast(false)); err != nil { | ||||
| 			t.Errorf("grpc.Invoke(_, _, _, _, _) = %v, want <nil>", err) | ||||
| 		} | ||||
| 	}() | ||||
|  | ||||
							
								
								
									
										10
									
								
								call.go
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								call.go
									
									
									
									
									
								
							| @ -35,6 +35,7 @@ package grpc | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	//"fmt" | ||||
| 	"io" | ||||
| 	"time" | ||||
|  | ||||
| @ -101,7 +102,7 @@ func sendRequest(ctx context.Context, codec Codec, compressor Compressor, callHd | ||||
| // Invoke is called by generated code. Also users can call Invoke directly when it | ||||
| // is really needed in their use cases. | ||||
| func Invoke(ctx context.Context, method string, args, reply interface{}, cc *ClientConn, opts ...CallOption) (err error) { | ||||
| 	var c callInfo | ||||
| 	c := defaultCallInfo | ||||
| 	for _, o := range opts { | ||||
| 		if err := o.before(&c); err != nil { | ||||
| 			return toRPCErr(err) | ||||
| @ -165,9 +166,12 @@ func Invoke(ctx context.Context, method string, args, reply interface{}, cc *Cli | ||||
| 				if c.failFast { | ||||
| 					return toRPCErr(err) | ||||
| 				} | ||||
| 				continue | ||||
| 			} | ||||
| 			// All the remaining cases are treated as retryable. | ||||
| 			continue | ||||
| 			// ALl the other errors are treated as Internal errors. | ||||
| 			return Errorf(codes.Internal, "%v", err) | ||||
| 			// All the remaining cases are treated as fatal. | ||||
| 			//panic(fmt.Sprintf("ClientConn.getTransport got an unsupported error: %v", err)) | ||||
| 		} | ||||
| 		if c.traceInfo.tr != nil { | ||||
| 			c.traceInfo.tr.LazyLog(&payload{sent: true, msg: args}, true) | ||||
|  | ||||
| @ -424,7 +424,6 @@ func (cc *ClientConn) newAddrConn(addr Address, skipWait bool) error { | ||||
| } | ||||
|  | ||||
| func (cc *ClientConn) getTransport(ctx context.Context, opts BalancerGetOptions) (transport.ClientTransport, func(), error) { | ||||
| 	// TODO(zhaoq): Implement fail-fast logic. | ||||
| 	addr, put, err := cc.balancer.Get(ctx, opts) | ||||
| 	if err != nil { | ||||
| 		return nil, nil, err | ||||
| @ -442,7 +441,7 @@ func (cc *ClientConn) getTransport(ctx context.Context, opts BalancerGetOptions) | ||||
| 		} | ||||
| 		return nil, nil, transport.StreamErrorf(codes.Internal, "grpc: failed to find the transport to send the rpc") | ||||
| 	} | ||||
| 	t, err := ac.wait(ctx) | ||||
| 	t, err := ac.wait(ctx, !opts.BlockingWait) | ||||
| 	if err != nil { | ||||
| 		if put != nil { | ||||
| 			put() | ||||
| @ -649,8 +648,9 @@ func (ac *addrConn) transportMonitor() { | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // wait blocks until i) the new transport is up or ii) ctx is done or iii) ac is closed. | ||||
| func (ac *addrConn) wait(ctx context.Context) (transport.ClientTransport, error) { | ||||
| // wait blocks until i) the new transport is up or ii) ctx is done or iii) ac is closed or | ||||
| // iv) transport is in TransientFailure and the RPC is fail-fast. | ||||
| func (ac *addrConn) wait(ctx context.Context, failFast bool) (transport.ClientTransport, error) { | ||||
| 	for { | ||||
| 		ac.mu.Lock() | ||||
| 		switch { | ||||
| @ -662,6 +662,10 @@ func (ac *addrConn) wait(ctx context.Context) (transport.ClientTransport, error) | ||||
| 			ac.mu.Unlock() | ||||
| 			return ct, nil | ||||
| 		default: | ||||
| 			if ac.state == TransientFailure && failFast { | ||||
| 				ac.mu.Unlock() | ||||
| 				return nil, transport.StreamErrorf(codes.Canceled, "grpc: RPC failed fast due to transport failure") | ||||
| 			} | ||||
| 			ready := ac.ready | ||||
| 			if ready == nil { | ||||
| 				ready = make(chan struct{}) | ||||
| @ -673,6 +677,13 @@ func (ac *addrConn) wait(ctx context.Context) (transport.ClientTransport, error) | ||||
| 				return nil, transport.ContextErr(ctx.Err()) | ||||
| 			// Wait until the new transport is ready or failed. | ||||
| 			case <-ready: | ||||
| 				ac.mu.Lock() | ||||
| 				if ac.state == TransientFailure && failFast { | ||||
| 					ac.mu.Unlock() | ||||
| 					return nil, transport.StreamErrorf(codes.Canceled, "grpc: RPC failed fast due to transport failure") | ||||
| 				} | ||||
| 				ac.mu.Unlock() | ||||
|  | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
							
								
								
									
										14
									
								
								rpc_util.go
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								rpc_util.go
									
									
									
									
									
								
							| @ -141,6 +141,8 @@ type callInfo struct { | ||||
| 	traceInfo traceInfo // in trace.go | ||||
| } | ||||
|  | ||||
| var defaultCallInfo = callInfo{failFast: true} | ||||
|  | ||||
| // CallOption configures a Call before it starts or extracts information from | ||||
| // a Call after it completes. | ||||
| type CallOption interface { | ||||
| @ -179,6 +181,18 @@ func Trailer(md *metadata.MD) CallOption { | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // FailFast configures the action to take when an RPC is attempted on broken | ||||
| // connections or unreachable servers. If failfast is true, the RPC will fail | ||||
| // immediately.  Otherwise, the RPC client will block the call until a | ||||
| // connection is available (or the call is canceled or times out) and will retry | ||||
| // the call if it fails due to a transient error. | ||||
| func FailFast(failFast bool) CallOption { | ||||
| 	return beforeCall(func(c *callInfo) error { | ||||
| 		c.failFast = failFast | ||||
| 		return nil | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // The format of the payload: compressed or not? | ||||
| type payloadFormat uint8 | ||||
|  | ||||
|  | ||||
							
								
								
									
										22
									
								
								stream.go
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								stream.go
									
									
									
									
									
								
							| @ -105,9 +105,14 @@ func NewClientStream(ctx context.Context, desc *StreamDesc, cc *ClientConn, meth | ||||
| 		err error | ||||
| 		put func() | ||||
| 	) | ||||
| 	// TODO(zhaoq): CallOption is omitted. Add support when it is needed. | ||||
| 	c := defaultCallInfo | ||||
| 	for _, o := range opts { | ||||
| 		if err := o.before(&c); err != nil { | ||||
| 			return nil, toRPCErr(err) | ||||
| 		} | ||||
| 	} | ||||
| 	gopts := BalancerGetOptions{ | ||||
| 		BlockingWait: false, | ||||
| 		BlockingWait: !c.failFast, | ||||
| 	} | ||||
| 	t, put, err = cc.getTransport(ctx, gopts) | ||||
| 	if err != nil { | ||||
| @ -122,6 +127,8 @@ func NewClientStream(ctx context.Context, desc *StreamDesc, cc *ClientConn, meth | ||||
| 		callHdr.SendCompress = cc.dopts.cp.Type() | ||||
| 	} | ||||
| 	cs := &clientStream{ | ||||
| 		opts:    opts, | ||||
| 		c:       c, | ||||
| 		desc:    desc, | ||||
| 		put:     put, | ||||
| 		codec:   cc.dopts.codec, | ||||
| @ -167,6 +174,8 @@ func NewClientStream(ctx context.Context, desc *StreamDesc, cc *ClientConn, meth | ||||
|  | ||||
| // clientStream implements a client side Stream. | ||||
| type clientStream struct { | ||||
| 	opts  []CallOption | ||||
| 	c     callInfo | ||||
| 	t     transport.ClientTransport | ||||
| 	s     *transport.Stream | ||||
| 	p     *parser | ||||
| @ -312,15 +321,18 @@ func (cs *clientStream) closeTransportStream(err error) { | ||||
| } | ||||
|  | ||||
| func (cs *clientStream) finish(err error) { | ||||
| 	if !cs.tracing { | ||||
| 		return | ||||
| 	} | ||||
| 	cs.mu.Lock() | ||||
| 	defer cs.mu.Unlock() | ||||
| 	for _, o := range cs.opts { | ||||
| 		o.after(&cs.c) | ||||
| 	} | ||||
| 	if cs.put != nil { | ||||
| 		cs.put() | ||||
| 		cs.put = nil | ||||
| 	} | ||||
| 	if !cs.tracing { | ||||
| 		return | ||||
| 	} | ||||
| 	if cs.trInfo.tr != nil { | ||||
| 		if err == nil || err == io.EOF { | ||||
| 			cs.trInfo.tr.LazyPrintf("RPC: [OK]") | ||||
|  | ||||
| @ -550,7 +550,7 @@ func testTimeoutOnDeadServer(t *testing.T, e env) { | ||||
|  | ||||
| 	cc := te.clientConn() | ||||
| 	tc := testpb.NewTestServiceClient(cc) | ||||
| 	if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}); err != nil { | ||||
| 	if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}, grpc.FailFast(false)); err != nil { | ||||
| 		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) | ||||
| 	} | ||||
| 	te.srv.Stop() | ||||
| @ -558,12 +558,54 @@ func testTimeoutOnDeadServer(t *testing.T, e env) { | ||||
| 	// notification in time the failure path of the 1st invoke of | ||||
| 	// ClientConn.wait hits the deadline exceeded error. | ||||
| 	ctx, _ := context.WithTimeout(context.Background(), -1) | ||||
| 	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}); grpc.Code(err) != codes.DeadlineExceeded { | ||||
| 	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}, grpc.FailFast(false)); grpc.Code(err) != codes.DeadlineExceeded { | ||||
| 		t.Fatalf("TestService/EmptyCall(%v, _) = _, %v, want _, error code: %d", ctx, err, codes.DeadlineExceeded) | ||||
| 	} | ||||
| 	awaitNewConnLogOutput() | ||||
| } | ||||
|  | ||||
| func TestFailFast(t *testing.T) { | ||||
| 	defer leakCheck(t)() | ||||
| 	for _, e := range listTestEnv() { | ||||
| 		testFailFast(t, e) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func testFailFast(t *testing.T, e env) { | ||||
| 	te := newTest(t, e) | ||||
| 	te.userAgent = testAppUA | ||||
| 	te.declareLogNoise( | ||||
| 		"transport: http2Client.notifyError got notified that the client transport was broken EOF", | ||||
| 		"grpc: Conn.transportMonitor exits due to: grpc: the client connection is closing", | ||||
| 		"grpc: Conn.resetTransport failed to create client transport: connection error", | ||||
| 		"grpc: Conn.resetTransport failed to create client transport: connection error: desc = \"transport: dial unix", | ||||
| 	) | ||||
| 	te.startServer() | ||||
| 	defer te.tearDown() | ||||
|  | ||||
| 	cc := te.clientConn() | ||||
| 	tc := testpb.NewTestServiceClient(cc) | ||||
| 	if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}); err != nil { | ||||
| 		t.Fatalf("TestService/EmptyCall(_, _) = _, %v, want _, <nil>", err) | ||||
| 	} | ||||
| 	// Stop the server and tear down all the exisiting connections. | ||||
| 	te.srv.Stop() | ||||
| 	// Issue an RPC to make sure the server teardown is propagated to the client already. | ||||
| 	ctx, _ := context.WithTimeout(context.Background(), time.Millisecond) | ||||
| 	if _, err := tc.EmptyCall(ctx, &testpb.Empty{}, grpc.FailFast(false)); grpc.Code(err) != codes.DeadlineExceeded { | ||||
| 		t.Fatalf("TestService/EmptyCall(%v, _) = _, %v, want _, error code: %d", ctx, err, codes.DeadlineExceeded) | ||||
| 	} | ||||
| 	// The client keeps reconnecting and ongoing fail-fast RPCs should fail with code.Canceled. | ||||
| 	if _, err := tc.EmptyCall(context.Background(), &testpb.Empty{}); grpc.Code(err) != codes.Canceled { | ||||
| 		t.Fatalf("TestService/EmptyCall(_, _, _) = _, %v, want _, error code: %d", err, codes.Canceled) | ||||
| 	} | ||||
| 	if _, err := tc.StreamingInputCall(ctx); grpc.Code(err) != codes.Canceled { | ||||
| 		t.Fatalf("TestService/StreamingInputCall(_) = _, %v, want _, error code: %d", err, codes.Canceled) | ||||
| 	} | ||||
|  | ||||
| 	awaitNewConnLogOutput() | ||||
| } | ||||
|  | ||||
| func healthCheck(d time.Duration, cc *grpc.ClientConn, serviceName string) (*healthpb.HealthCheckResponse, error) { | ||||
| 	ctx, _ := context.WithTimeout(context.Background(), d) | ||||
| 	hc := healthpb.NewHealthClient(cc) | ||||
| @ -879,7 +921,7 @@ func performOneRPC(t *testing.T, tc testpb.TestServiceClient, wg *sync.WaitGroup | ||||
| 		ResponseSize: proto.Int32(respSize), | ||||
| 		Payload:      payload, | ||||
| 	} | ||||
| 	reply, err := tc.UnaryCall(context.Background(), req) | ||||
| 	reply, err := tc.UnaryCall(context.Background(), req, grpc.FailFast(false)) | ||||
| 	if err != nil { | ||||
| 		t.Errorf("TestService/UnaryCall(_, _) = _, %v, want _, <nil>", err) | ||||
| 		return | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	 iamqizhao
					iamqizhao