grpclb: do not send redundant empty load reports to remote LB (#3249)

Small refactor of tests:

- Add a channel to remoteBalancer on which to send received stats messages
- Change runAndGetStats to runAndCheckStats to allow for faster successful test
  runs (no longer need to sleep for one whole second per test).
- Remove redundant leak check from runAndCheckStats, otherwise excess load
  report messages not read from the channel will result in an infinite loop.
- Add String method to rpcStats to avoid data race between merging and printing.
This commit is contained in:
Doug Fawley
2019-12-12 14:10:47 -08:00
committed by GitHub
parent df18b5433b
commit 085c980048
3 changed files with 103 additions and 44 deletions

View File

@ -49,6 +49,14 @@ func newRPCStats() *rpcStats {
}
}
func isZeroStats(stats *lbpb.ClientStats) bool {
return len(stats.CallsFinishedWithDrop) == 0 &&
stats.NumCallsStarted == 0 &&
stats.NumCallsFinished == 0 &&
stats.NumCallsFinishedWithClientFailedToSend == 0 &&
stats.NumCallsFinishedKnownReceived == 0
}
// toClientStats converts rpcStats to lbpb.ClientStats, and clears rpcStats.
func (s *rpcStats) toClientStats() *lbpb.ClientStats {
stats := &lbpb.ClientStats{

View File

@ -286,6 +286,7 @@ func (ccw *remoteBalancerCCWrapper) readServerList(s *balanceLoadClientStream) e
func (ccw *remoteBalancerCCWrapper) sendLoadReport(s *balanceLoadClientStream, interval time.Duration) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
lastZero := false
for {
select {
case <-ticker.C:
@ -293,6 +294,12 @@ func (ccw *remoteBalancerCCWrapper) sendLoadReport(s *balanceLoadClientStream, i
return
}
stats := ccw.lb.clientStats.toClientStats()
zero := isZeroStats(stats)
if zero && lastZero {
// Quash redundant empty load reports.
continue
}
lastZero = zero
t := time.Now()
stats.Timestamp = &timestamppb.Timestamp{
Seconds: t.Unix(),

View File

@ -166,18 +166,31 @@ func (s *rpcStats) equal(o *rpcStats) bool {
return mapsEqual(s.numCallsDropped, o.numCallsDropped)
}
func (s *rpcStats) String() string {
s.mu.Lock()
defer s.mu.Unlock()
return fmt.Sprintf("Started: %v, Finished: %v, FinishedWithClientFailedToSend: %v, FinishedKnownReceived: %v, Dropped: %v",
atomic.LoadInt64(&s.numCallsStarted),
atomic.LoadInt64(&s.numCallsFinished),
atomic.LoadInt64(&s.numCallsFinishedWithClientFailedToSend),
atomic.LoadInt64(&s.numCallsFinishedKnownReceived),
s.numCallsDropped)
}
type remoteBalancer struct {
sls chan *lbpb.ServerList
statsDura time.Duration
done chan struct{}
stats *rpcStats
statsChan chan *lbpb.ClientStats
}
func newRemoteBalancer(intervals []time.Duration) *remoteBalancer {
func newRemoteBalancer(intervals []time.Duration, statsChan chan *lbpb.ClientStats) *remoteBalancer {
return &remoteBalancer{
sls: make(chan *lbpb.ServerList, 1),
done: make(chan struct{}),
stats: newRPCStats(),
sls: make(chan *lbpb.ServerList, 1),
done: make(chan struct{}),
stats: newRPCStats(),
statsChan: statsChan,
}
}
@ -218,6 +231,9 @@ func (b *remoteBalancer) BalanceLoad(stream lbgrpc.LoadBalancer_BalanceLoadServe
return
}
b.stats.merge(req.GetClientStats())
if b.statsChan != nil && req.GetClientStats() != nil {
b.statsChan <- req.GetClientStats()
}
}
}()
for {
@ -295,7 +311,7 @@ type testServers struct {
beListeners []net.Listener
}
func newLoadBalancer(numberOfBackends int) (tss *testServers, cleanup func(), err error) {
func newLoadBalancer(numberOfBackends int, statsChan chan *lbpb.ClientStats) (tss *testServers, cleanup func(), err error) {
var (
beListeners []net.Listener
ls *remoteBalancer
@ -328,7 +344,7 @@ func newLoadBalancer(numberOfBackends int) (tss *testServers, cleanup func(), er
sn: lbServerName,
}
lb = grpc.NewServer(grpc.Creds(lbCreds))
ls = newRemoteBalancer(nil)
ls = newRemoteBalancer(nil, statsChan)
lbgrpc.RegisterLoadBalancerServer(lb, ls)
go func() {
lb.Serve(lbLis)
@ -361,7 +377,7 @@ func TestGRPCLB(t *testing.T) {
r, cleanup := manual.GenerateAndRegisterManualResolver()
defer cleanup()
tss, cleanup, err := newLoadBalancer(1)
tss, cleanup, err := newLoadBalancer(1, nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -407,7 +423,7 @@ func TestGRPCLBWeighted(t *testing.T) {
r, cleanup := manual.GenerateAndRegisterManualResolver()
defer cleanup()
tss, cleanup, err := newLoadBalancer(2)
tss, cleanup, err := newLoadBalancer(2, nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -475,7 +491,7 @@ func TestDropRequest(t *testing.T) {
r, cleanup := manual.GenerateAndRegisterManualResolver()
defer cleanup()
tss, cleanup, err := newLoadBalancer(2)
tss, cleanup, err := newLoadBalancer(2, nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -633,7 +649,7 @@ func TestBalancerDisconnects(t *testing.T) {
lbs []*grpc.Server
)
for i := 0; i < 2; i++ {
tss, cleanup, err := newLoadBalancer(1)
tss, cleanup, err := newLoadBalancer(1, nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -708,7 +724,7 @@ func TestFallback(t *testing.T) {
r, cleanup := manual.GenerateAndRegisterManualResolver()
defer cleanup()
tss, cleanup, err := newLoadBalancer(1)
tss, cleanup, err := newLoadBalancer(1, nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -843,7 +859,7 @@ func TestFallBackWithNoServerAddress(t *testing.T) {
}
defer cleanup()
tss, cleanup, err := newLoadBalancer(1)
tss, cleanup, err := newLoadBalancer(1, nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -957,7 +973,7 @@ func TestGRPCLBPickFirst(t *testing.T) {
r, cleanup := manual.GenerateAndRegisterManualResolver()
defer cleanup()
tss, cleanup, err := newLoadBalancer(3)
tss, cleanup, err := newLoadBalancer(3, nil)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -1108,13 +1124,11 @@ func checkStats(stats, expected *rpcStats) error {
return nil
}
func runAndGetStats(t *testing.T, drop bool, runRPCs func(*grpc.ClientConn)) *rpcStats {
defer leakcheck.Check(t)
func runAndCheckStats(t *testing.T, drop bool, statsChan chan *lbpb.ClientStats, runRPCs func(*grpc.ClientConn), statsWant *rpcStats) error {
r, cleanup := manual.GenerateAndRegisterManualResolver()
defer cleanup()
tss, cleanup, err := newLoadBalancer(1)
tss, cleanup, err := newLoadBalancer(1, statsChan)
if err != nil {
t.Fatalf("failed to create new load balancer: %v", err)
}
@ -1152,9 +1166,14 @@ func runAndGetStats(t *testing.T, drop bool, runRPCs func(*grpc.ClientConn)) *rp
}}})
runRPCs(cc)
time.Sleep(1 * time.Second)
stats := tss.ls.stats
return stats
end := time.Now().Add(time.Second)
for time.Now().Before(end) {
if err := checkStats(tss.ls.stats, statsWant); err == nil {
time.Sleep(200 * time.Millisecond) // sleep for two intervals to make sure no new stats are reported.
break
}
}
return checkStats(tss.ls.stats, statsWant)
}
const (
@ -1164,7 +1183,7 @@ const (
func TestGRPCLBStatsUnarySuccess(t *testing.T) {
defer leakcheck.Check(t)
stats := runAndGetStats(t, false, func(cc *grpc.ClientConn) {
if err := runAndCheckStats(t, false, nil, func(cc *grpc.ClientConn) {
testC := testpb.NewTestServiceClient(cc)
// The first non-failfast RPC succeeds, all connections are up.
if _, err := testC.EmptyCall(context.Background(), &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
@ -1173,9 +1192,7 @@ func TestGRPCLBStatsUnarySuccess(t *testing.T) {
for i := 0; i < countRPC-1; i++ {
testC.EmptyCall(context.Background(), &testpb.Empty{})
}
})
if err := checkStats(stats, &rpcStats{
}, &rpcStats{
numCallsStarted: int64(countRPC),
numCallsFinished: int64(countRPC),
numCallsFinishedKnownReceived: int64(countRPC),
@ -1186,7 +1203,7 @@ func TestGRPCLBStatsUnarySuccess(t *testing.T) {
func TestGRPCLBStatsUnaryDrop(t *testing.T) {
defer leakcheck.Check(t)
stats := runAndGetStats(t, true, func(cc *grpc.ClientConn) {
if err := runAndCheckStats(t, true, nil, func(cc *grpc.ClientConn) {
testC := testpb.NewTestServiceClient(cc)
// The first non-failfast RPC succeeds, all connections are up.
if _, err := testC.EmptyCall(context.Background(), &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
@ -1195,9 +1212,7 @@ func TestGRPCLBStatsUnaryDrop(t *testing.T) {
for i := 0; i < countRPC-1; i++ {
testC.EmptyCall(context.Background(), &testpb.Empty{})
}
})
if err := checkStats(stats, &rpcStats{
}, &rpcStats{
numCallsStarted: int64(countRPC),
numCallsFinished: int64(countRPC),
numCallsFinishedKnownReceived: int64(countRPC) / 2,
@ -1209,7 +1224,7 @@ func TestGRPCLBStatsUnaryDrop(t *testing.T) {
func TestGRPCLBStatsUnaryFailedToSend(t *testing.T) {
defer leakcheck.Check(t)
stats := runAndGetStats(t, false, func(cc *grpc.ClientConn) {
if err := runAndCheckStats(t, false, nil, func(cc *grpc.ClientConn) {
testC := testpb.NewTestServiceClient(cc)
// The first non-failfast RPC succeeds, all connections are up.
if _, err := testC.EmptyCall(context.Background(), &testpb.Empty{}, grpc.WaitForReady(true)); err != nil {
@ -1218,9 +1233,7 @@ func TestGRPCLBStatsUnaryFailedToSend(t *testing.T) {
for i := 0; i < countRPC-1; i++ {
cc.Invoke(context.Background(), failtosendURI, &testpb.Empty{}, nil)
}
})
if err := checkStats(stats, &rpcStats{
}, &rpcStats{
numCallsStarted: int64(countRPC)*2 - 1,
numCallsFinished: int64(countRPC)*2 - 1,
numCallsFinishedWithClientFailedToSend: int64(countRPC-1) * 2,
@ -1232,7 +1245,7 @@ func TestGRPCLBStatsUnaryFailedToSend(t *testing.T) {
func TestGRPCLBStatsStreamingSuccess(t *testing.T) {
defer leakcheck.Check(t)
stats := runAndGetStats(t, false, func(cc *grpc.ClientConn) {
if err := runAndCheckStats(t, false, nil, func(cc *grpc.ClientConn) {
testC := testpb.NewTestServiceClient(cc)
// The first non-failfast RPC succeeds, all connections are up.
stream, err := testC.FullDuplexCall(context.Background(), grpc.WaitForReady(true))
@ -1255,9 +1268,7 @@ func TestGRPCLBStatsStreamingSuccess(t *testing.T) {
}
}
}
})
if err := checkStats(stats, &rpcStats{
}, &rpcStats{
numCallsStarted: int64(countRPC),
numCallsFinished: int64(countRPC),
numCallsFinishedKnownReceived: int64(countRPC),
@ -1268,7 +1279,7 @@ func TestGRPCLBStatsStreamingSuccess(t *testing.T) {
func TestGRPCLBStatsStreamingDrop(t *testing.T) {
defer leakcheck.Check(t)
stats := runAndGetStats(t, true, func(cc *grpc.ClientConn) {
if err := runAndCheckStats(t, true, nil, func(cc *grpc.ClientConn) {
testC := testpb.NewTestServiceClient(cc)
// The first non-failfast RPC succeeds, all connections are up.
stream, err := testC.FullDuplexCall(context.Background(), grpc.WaitForReady(true))
@ -1291,9 +1302,7 @@ func TestGRPCLBStatsStreamingDrop(t *testing.T) {
}
}
}
})
if err := checkStats(stats, &rpcStats{
}, &rpcStats{
numCallsStarted: int64(countRPC),
numCallsFinished: int64(countRPC),
numCallsFinishedKnownReceived: int64(countRPC) / 2,
@ -1305,7 +1314,7 @@ func TestGRPCLBStatsStreamingDrop(t *testing.T) {
func TestGRPCLBStatsStreamingFailedToSend(t *testing.T) {
defer leakcheck.Check(t)
stats := runAndGetStats(t, false, func(cc *grpc.ClientConn) {
if err := runAndCheckStats(t, false, nil, func(cc *grpc.ClientConn) {
testC := testpb.NewTestServiceClient(cc)
// The first non-failfast RPC succeeds, all connections are up.
stream, err := testC.FullDuplexCall(context.Background(), grpc.WaitForReady(true))
@ -1320,9 +1329,7 @@ func TestGRPCLBStatsStreamingFailedToSend(t *testing.T) {
for i := 0; i < countRPC-1; i++ {
cc.NewStream(context.Background(), &grpc.StreamDesc{}, failtosendURI)
}
})
if err := checkStats(stats, &rpcStats{
}, &rpcStats{
numCallsStarted: int64(countRPC)*2 - 1,
numCallsFinished: int64(countRPC)*2 - 1,
numCallsFinishedWithClientFailedToSend: int64(countRPC-1) * 2,
@ -1331,3 +1338,40 @@ func TestGRPCLBStatsStreamingFailedToSend(t *testing.T) {
t.Fatal(err)
}
}
func TestGRPCLBStatsQuashEmpty(t *testing.T) {
defer leakcheck.Check(t)
ch := make(chan *lbpb.ClientStats)
defer close(ch)
if err := runAndCheckStats(t, false, ch, func(cc *grpc.ClientConn) {
// Perform no RPCs; wait for load reports to start, which should be
// zero, then expect no other load report within 5x the update
// interval.
select {
case st := <-ch:
if !isZeroStats(st) {
t.Errorf("got stats %v; want all zero", st)
}
case <-time.After(5 * time.Second):
t.Errorf("did not get initial stats report after 5 seconds")
return
}
select {
case st := <-ch:
t.Errorf("got unexpected stats report: %v", st)
case <-time.After(500 * time.Millisecond):
// Success.
}
go func() {
for range ch { // Drain statsChan until it is closed.
}
}()
}, &rpcStats{
numCallsStarted: 0,
numCallsFinished: 0,
numCallsFinishedKnownReceived: 0,
}); err != nil {
t.Fatal(err)
}
}