grpclb: fix deadlock in grpclb connection cache (#3017)
Before the fix, if the timer to remove a SubConn fires at the same time NewSubConn cancels the timer, it caused a mutex leak and deadlock.
This commit is contained in:
@ -173,13 +173,13 @@ func (ccc *lbCacheClientConn) RemoveSubConn(sc balancer.SubConn) {
|
|||||||
|
|
||||||
timer := time.AfterFunc(ccc.timeout, func() {
|
timer := time.AfterFunc(ccc.timeout, func() {
|
||||||
ccc.mu.Lock()
|
ccc.mu.Lock()
|
||||||
|
defer ccc.mu.Unlock()
|
||||||
if entry.abortDeleting {
|
if entry.abortDeleting {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
ccc.cc.RemoveSubConn(sc)
|
ccc.cc.RemoveSubConn(sc)
|
||||||
delete(ccc.subConnToAddr, sc)
|
delete(ccc.subConnToAddr, sc)
|
||||||
delete(ccc.subConnCache, addr)
|
delete(ccc.subConnCache, addr)
|
||||||
ccc.mu.Unlock()
|
|
||||||
})
|
})
|
||||||
entry.cancel = func() {
|
entry.cancel = func() {
|
||||||
if !timer.Stop() {
|
if !timer.Stop() {
|
||||||
|
@ -217,3 +217,46 @@ func TestLBCacheClientConnReuse(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test that if the timer to remove a SubConn fires at the same time NewSubConn
|
||||||
|
// cancels the timer, it doesn't cause deadlock.
|
||||||
|
func TestLBCache_RemoveTimer_New_Race(t *testing.T) {
|
||||||
|
mcc := newMockClientConn()
|
||||||
|
if err := checkMockCC(mcc, 0); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ccc := newLBCacheClientConn(mcc)
|
||||||
|
ccc.timeout = time.Nanosecond
|
||||||
|
if err := checkCacheCC(ccc, 0, 0); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sc, _ := ccc.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{})
|
||||||
|
// One subconn in MockCC.
|
||||||
|
if err := checkMockCC(mcc, 1); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// No subconn being deleted, and one in CacheCC.
|
||||||
|
if err := checkCacheCC(ccc, 0, 1); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
// Remove starts a timer with 1 ns timeout, the NewSubConn will race
|
||||||
|
// with with the timer.
|
||||||
|
ccc.RemoveSubConn(sc)
|
||||||
|
sc, _ = ccc.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{})
|
||||||
|
}
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-time.After(time.Second):
|
||||||
|
t.Fatalf("Test didn't finish within 1 second. Deadlock")
|
||||||
|
case <-done:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user