mirror of
https://github.com/ipfs/kubo.git
synced 2025-06-26 15:42:21 +08:00
Merge pull request #877 from jbenet/race-fix
query: fixed race condition
This commit is contained in:
@ -52,7 +52,7 @@ func TestGetFailures(t *testing.T) {
|
|||||||
err = merr[0]
|
err = merr[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != context.DeadlineExceeded {
|
if err != context.DeadlineExceeded && err != context.Canceled {
|
||||||
t.Fatal("Got different error than we expected", err)
|
t.Fatal("Got different error than we expected", err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -12,7 +12,8 @@ import (
|
|||||||
pset "github.com/jbenet/go-ipfs/util/peerset"
|
pset "github.com/jbenet/go-ipfs/util/peerset"
|
||||||
todoctr "github.com/jbenet/go-ipfs/util/todocounter"
|
todoctr "github.com/jbenet/go-ipfs/util/todocounter"
|
||||||
|
|
||||||
ctxgroup "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-ctxgroup"
|
process "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/goprocess"
|
||||||
|
ctxproc "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/goprocess/context"
|
||||||
context "github.com/jbenet/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context"
|
context "github.com/jbenet/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -52,11 +53,17 @@ type queryFunc func(context.Context, peer.ID) (*dhtQueryResult, error)
|
|||||||
|
|
||||||
// Run runs the query at hand. pass in a list of peers to use first.
|
// Run runs the query at hand. pass in a list of peers to use first.
|
||||||
func (q *dhtQuery) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
|
func (q *dhtQuery) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(ctx)
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
runner := newQueryRunner(ctx, q)
|
runner := newQueryRunner(q)
|
||||||
return runner.Run(peers)
|
return runner.Run(ctx, peers)
|
||||||
}
|
}
|
||||||
|
|
||||||
type dhtQueryRunner struct {
|
type dhtQueryRunner struct {
|
||||||
@ -71,22 +78,24 @@ type dhtQueryRunner struct {
|
|||||||
rateLimit chan struct{} // processing semaphore
|
rateLimit chan struct{} // processing semaphore
|
||||||
log eventlog.EventLogger
|
log eventlog.EventLogger
|
||||||
|
|
||||||
cg ctxgroup.ContextGroup
|
proc process.Process
|
||||||
sync.RWMutex
|
sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func newQueryRunner(ctx context.Context, q *dhtQuery) *dhtQueryRunner {
|
func newQueryRunner(q *dhtQuery) *dhtQueryRunner {
|
||||||
|
proc := process.WithParent(process.Background())
|
||||||
|
ctx := ctxproc.WithProcessClosing(context.Background(), proc)
|
||||||
return &dhtQueryRunner{
|
return &dhtQueryRunner{
|
||||||
query: q,
|
query: q,
|
||||||
peersToQuery: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(q.key)),
|
peersToQuery: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(q.key)),
|
||||||
peersRemaining: todoctr.NewSyncCounter(),
|
peersRemaining: todoctr.NewSyncCounter(),
|
||||||
peersSeen: pset.New(),
|
peersSeen: pset.New(),
|
||||||
rateLimit: make(chan struct{}, q.concurrency),
|
rateLimit: make(chan struct{}, q.concurrency),
|
||||||
cg: ctxgroup.WithContext(ctx),
|
proc: proc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) {
|
func (r *dhtQueryRunner) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
|
||||||
r.log = log
|
r.log = log
|
||||||
|
|
||||||
if len(peers) == 0 {
|
if len(peers) == 0 {
|
||||||
@ -101,22 +110,30 @@ func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) {
|
|||||||
|
|
||||||
// add all the peers we got first.
|
// add all the peers we got first.
|
||||||
for _, p := range peers {
|
for _, p := range peers {
|
||||||
r.addPeerToQuery(r.cg.Context(), p)
|
r.addPeerToQuery(p)
|
||||||
}
|
}
|
||||||
|
|
||||||
// go do this thing.
|
// go do this thing.
|
||||||
// do it as a child func to make sure Run exits
|
// do it as a child proc to make sure Run exits
|
||||||
// ONLY AFTER spawn workers has exited.
|
// ONLY AFTER spawn workers has exited.
|
||||||
r.cg.AddChildFunc(r.spawnWorkers)
|
r.proc.Go(r.spawnWorkers)
|
||||||
|
|
||||||
// so workers are working.
|
// so workers are working.
|
||||||
|
|
||||||
// wait until they're done.
|
// wait until they're done.
|
||||||
err := routing.ErrNotFound
|
err := routing.ErrNotFound
|
||||||
|
|
||||||
|
// now, if the context finishes, close the proc.
|
||||||
|
// we have to do it here because the logic before is setup, which
|
||||||
|
// should run without closing the proc.
|
||||||
|
go func() {
|
||||||
|
<-ctx.Done()
|
||||||
|
r.proc.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-r.peersRemaining.Done():
|
case <-r.peersRemaining.Done():
|
||||||
r.cg.Close()
|
r.proc.Close()
|
||||||
r.RLock()
|
r.RLock()
|
||||||
defer r.RUnlock()
|
defer r.RUnlock()
|
||||||
|
|
||||||
@ -128,12 +145,10 @@ func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) {
|
|||||||
err = r.errs[0]
|
err = r.errs[0]
|
||||||
}
|
}
|
||||||
|
|
||||||
case <-r.cg.Closed():
|
case <-r.proc.Closed():
|
||||||
log.Debug("r.cg.Closed()")
|
|
||||||
|
|
||||||
r.RLock()
|
r.RLock()
|
||||||
defer r.RUnlock()
|
defer r.RUnlock()
|
||||||
err = r.cg.Context().Err() // collect the error.
|
err = context.DeadlineExceeded
|
||||||
}
|
}
|
||||||
|
|
||||||
if r.result != nil && r.result.success {
|
if r.result != nil && r.result.success {
|
||||||
@ -143,7 +158,7 @@ func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *dhtQueryRunner) addPeerToQuery(ctx context.Context, next peer.ID) {
|
func (r *dhtQueryRunner) addPeerToQuery(next peer.ID) {
|
||||||
// if new peer is ourselves...
|
// if new peer is ourselves...
|
||||||
if next == r.query.dht.self {
|
if next == r.query.dht.self {
|
||||||
r.log.Debug("addPeerToQuery skip self")
|
r.log.Debug("addPeerToQuery skip self")
|
||||||
@ -157,18 +172,18 @@ func (r *dhtQueryRunner) addPeerToQuery(ctx context.Context, next peer.ID) {
|
|||||||
r.peersRemaining.Increment(1)
|
r.peersRemaining.Increment(1)
|
||||||
select {
|
select {
|
||||||
case r.peersToQuery.EnqChan <- next:
|
case r.peersToQuery.EnqChan <- next:
|
||||||
case <-ctx.Done():
|
case <-r.proc.Closing():
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *dhtQueryRunner) spawnWorkers(parent ctxgroup.ContextGroup) {
|
func (r *dhtQueryRunner) spawnWorkers(proc process.Process) {
|
||||||
for {
|
for {
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-r.peersRemaining.Done():
|
case <-r.peersRemaining.Done():
|
||||||
return
|
return
|
||||||
|
|
||||||
case <-r.cg.Closing():
|
case <-r.proc.Closing():
|
||||||
return
|
return
|
||||||
|
|
||||||
case p, more := <-r.peersToQuery.DeqChan:
|
case p, more := <-r.peersToQuery.DeqChan:
|
||||||
@ -178,24 +193,27 @@ func (r *dhtQueryRunner) spawnWorkers(parent ctxgroup.ContextGroup) {
|
|||||||
|
|
||||||
// do it as a child func to make sure Run exits
|
// do it as a child func to make sure Run exits
|
||||||
// ONLY AFTER spawn workers has exited.
|
// ONLY AFTER spawn workers has exited.
|
||||||
parent.AddChildFunc(func(cg ctxgroup.ContextGroup) {
|
proc.Go(func(proc process.Process) {
|
||||||
r.queryPeer(cg, p)
|
r.queryPeer(proc, p)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
|
func (r *dhtQueryRunner) queryPeer(proc process.Process, p peer.ID) {
|
||||||
// make sure we rate limit concurrency.
|
// make sure we rate limit concurrency.
|
||||||
select {
|
select {
|
||||||
case <-r.rateLimit:
|
case <-r.rateLimit:
|
||||||
case <-cg.Closing():
|
case <-proc.Closing():
|
||||||
r.peersRemaining.Decrement(1)
|
r.peersRemaining.Decrement(1)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// ok let's do this!
|
// ok let's do this!
|
||||||
|
|
||||||
|
// create a context from our proc.
|
||||||
|
ctx := ctxproc.WithProcessClosing(context.Background(), proc)
|
||||||
|
|
||||||
// make sure we do this when we exit
|
// make sure we do this when we exit
|
||||||
defer func() {
|
defer func() {
|
||||||
// signal we're done proccessing peer p
|
// signal we're done proccessing peer p
|
||||||
@ -212,10 +230,11 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
|
|||||||
r.rateLimit <- struct{}{}
|
r.rateLimit <- struct{}{}
|
||||||
|
|
||||||
pi := peer.PeerInfo{ID: p}
|
pi := peer.PeerInfo{ID: p}
|
||||||
if err := r.query.dht.host.Connect(cg.Context(), pi); err != nil {
|
|
||||||
|
if err := r.query.dht.host.Connect(ctx, pi); err != nil {
|
||||||
log.Debugf("Error connecting: %s", err)
|
log.Debugf("Error connecting: %s", err)
|
||||||
|
|
||||||
notif.PublishQueryEvent(cg.Context(), ¬if.QueryEvent{
|
notif.PublishQueryEvent(ctx, ¬if.QueryEvent{
|
||||||
Type: notif.QueryError,
|
Type: notif.QueryError,
|
||||||
Extra: err.Error(),
|
Extra: err.Error(),
|
||||||
})
|
})
|
||||||
@ -231,7 +250,7 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// finally, run the query against this peer
|
// finally, run the query against this peer
|
||||||
res, err := r.query.qfunc(cg.Context(), p)
|
res, err := r.query.qfunc(ctx, p)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debugf("ERROR worker for: %v %v", p, err)
|
log.Debugf("ERROR worker for: %v %v", p, err)
|
||||||
@ -244,7 +263,7 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
|
|||||||
r.Lock()
|
r.Lock()
|
||||||
r.result = res
|
r.result = res
|
||||||
r.Unlock()
|
r.Unlock()
|
||||||
go r.cg.Close() // signal to everyone that we're done.
|
go r.proc.Close() // signal to everyone that we're done.
|
||||||
// must be async, as we're one of the children, and Close blocks.
|
// must be async, as we're one of the children, and Close blocks.
|
||||||
|
|
||||||
} else if len(res.closerPeers) > 0 {
|
} else if len(res.closerPeers) > 0 {
|
||||||
@ -257,7 +276,7 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
|
|||||||
|
|
||||||
// add their addresses to the dialer's peerstore
|
// add their addresses to the dialer's peerstore
|
||||||
r.query.dht.peerstore.AddAddrs(next.ID, next.Addrs, peer.TempAddrTTL)
|
r.query.dht.peerstore.AddAddrs(next.ID, next.Addrs, peer.TempAddrTTL)
|
||||||
r.addPeerToQuery(cg.Context(), next.ID)
|
r.addPeerToQuery(next.ID)
|
||||||
log.Debugf("PEERS CLOSER -- worker for: %v added %v (%v)", p, next.ID, next.Addrs)
|
log.Debugf("PEERS CLOSER -- worker for: %v added %v (%v)", p, next.ID, next.Addrs)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
Reference in New Issue
Block a user