1
0
mirror of https://github.com/ipfs/kubo.git synced 2025-06-26 15:42:21 +08:00

Merge pull request #877 from jbenet/race-fix

query: fixed race condition
This commit is contained in:
Juan Batiz-Benet
2015-03-07 04:19:24 -08:00
2 changed files with 48 additions and 29 deletions

View File

@ -52,7 +52,7 @@ func TestGetFailures(t *testing.T) {
err = merr[0] err = merr[0]
} }
if err != context.DeadlineExceeded { if err != context.DeadlineExceeded && err != context.Canceled {
t.Fatal("Got different error than we expected", err) t.Fatal("Got different error than we expected", err)
} }
} else { } else {

View File

@ -12,7 +12,8 @@ import (
pset "github.com/jbenet/go-ipfs/util/peerset" pset "github.com/jbenet/go-ipfs/util/peerset"
todoctr "github.com/jbenet/go-ipfs/util/todocounter" todoctr "github.com/jbenet/go-ipfs/util/todocounter"
ctxgroup "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-ctxgroup" process "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/goprocess"
ctxproc "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/goprocess/context"
context "github.com/jbenet/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context" context "github.com/jbenet/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context"
) )
@ -52,11 +53,17 @@ type queryFunc func(context.Context, peer.ID) (*dhtQueryResult, error)
// Run runs the query at hand. pass in a list of peers to use first. // Run runs the query at hand. pass in a list of peers to use first.
func (q *dhtQuery) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) { func (q *dhtQuery) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
ctx, cancel := context.WithCancel(ctx) ctx, cancel := context.WithCancel(ctx)
defer cancel() defer cancel()
runner := newQueryRunner(ctx, q) runner := newQueryRunner(q)
return runner.Run(peers) return runner.Run(ctx, peers)
} }
type dhtQueryRunner struct { type dhtQueryRunner struct {
@ -71,22 +78,24 @@ type dhtQueryRunner struct {
rateLimit chan struct{} // processing semaphore rateLimit chan struct{} // processing semaphore
log eventlog.EventLogger log eventlog.EventLogger
cg ctxgroup.ContextGroup proc process.Process
sync.RWMutex sync.RWMutex
} }
func newQueryRunner(ctx context.Context, q *dhtQuery) *dhtQueryRunner { func newQueryRunner(q *dhtQuery) *dhtQueryRunner {
proc := process.WithParent(process.Background())
ctx := ctxproc.WithProcessClosing(context.Background(), proc)
return &dhtQueryRunner{ return &dhtQueryRunner{
query: q, query: q,
peersToQuery: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(q.key)), peersToQuery: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(q.key)),
peersRemaining: todoctr.NewSyncCounter(), peersRemaining: todoctr.NewSyncCounter(),
peersSeen: pset.New(), peersSeen: pset.New(),
rateLimit: make(chan struct{}, q.concurrency), rateLimit: make(chan struct{}, q.concurrency),
cg: ctxgroup.WithContext(ctx), proc: proc,
} }
} }
func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) { func (r *dhtQueryRunner) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
r.log = log r.log = log
if len(peers) == 0 { if len(peers) == 0 {
@ -101,22 +110,30 @@ func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) {
// add all the peers we got first. // add all the peers we got first.
for _, p := range peers { for _, p := range peers {
r.addPeerToQuery(r.cg.Context(), p) r.addPeerToQuery(p)
} }
// go do this thing. // go do this thing.
// do it as a child func to make sure Run exits // do it as a child proc to make sure Run exits
// ONLY AFTER spawn workers has exited. // ONLY AFTER spawn workers has exited.
r.cg.AddChildFunc(r.spawnWorkers) r.proc.Go(r.spawnWorkers)
// so workers are working. // so workers are working.
// wait until they're done. // wait until they're done.
err := routing.ErrNotFound err := routing.ErrNotFound
// now, if the context finishes, close the proc.
// we have to do it here because the logic before is setup, which
// should run without closing the proc.
go func() {
<-ctx.Done()
r.proc.Close()
}()
select { select {
case <-r.peersRemaining.Done(): case <-r.peersRemaining.Done():
r.cg.Close() r.proc.Close()
r.RLock() r.RLock()
defer r.RUnlock() defer r.RUnlock()
@ -128,12 +145,10 @@ func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) {
err = r.errs[0] err = r.errs[0]
} }
case <-r.cg.Closed(): case <-r.proc.Closed():
log.Debug("r.cg.Closed()")
r.RLock() r.RLock()
defer r.RUnlock() defer r.RUnlock()
err = r.cg.Context().Err() // collect the error. err = context.DeadlineExceeded
} }
if r.result != nil && r.result.success { if r.result != nil && r.result.success {
@ -143,7 +158,7 @@ func (r *dhtQueryRunner) Run(peers []peer.ID) (*dhtQueryResult, error) {
return nil, err return nil, err
} }
func (r *dhtQueryRunner) addPeerToQuery(ctx context.Context, next peer.ID) { func (r *dhtQueryRunner) addPeerToQuery(next peer.ID) {
// if new peer is ourselves... // if new peer is ourselves...
if next == r.query.dht.self { if next == r.query.dht.self {
r.log.Debug("addPeerToQuery skip self") r.log.Debug("addPeerToQuery skip self")
@ -157,18 +172,18 @@ func (r *dhtQueryRunner) addPeerToQuery(ctx context.Context, next peer.ID) {
r.peersRemaining.Increment(1) r.peersRemaining.Increment(1)
select { select {
case r.peersToQuery.EnqChan <- next: case r.peersToQuery.EnqChan <- next:
case <-ctx.Done(): case <-r.proc.Closing():
} }
} }
func (r *dhtQueryRunner) spawnWorkers(parent ctxgroup.ContextGroup) { func (r *dhtQueryRunner) spawnWorkers(proc process.Process) {
for { for {
select { select {
case <-r.peersRemaining.Done(): case <-r.peersRemaining.Done():
return return
case <-r.cg.Closing(): case <-r.proc.Closing():
return return
case p, more := <-r.peersToQuery.DeqChan: case p, more := <-r.peersToQuery.DeqChan:
@ -178,24 +193,27 @@ func (r *dhtQueryRunner) spawnWorkers(parent ctxgroup.ContextGroup) {
// do it as a child func to make sure Run exits // do it as a child func to make sure Run exits
// ONLY AFTER spawn workers has exited. // ONLY AFTER spawn workers has exited.
parent.AddChildFunc(func(cg ctxgroup.ContextGroup) { proc.Go(func(proc process.Process) {
r.queryPeer(cg, p) r.queryPeer(proc, p)
}) })
} }
} }
} }
func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) { func (r *dhtQueryRunner) queryPeer(proc process.Process, p peer.ID) {
// make sure we rate limit concurrency. // make sure we rate limit concurrency.
select { select {
case <-r.rateLimit: case <-r.rateLimit:
case <-cg.Closing(): case <-proc.Closing():
r.peersRemaining.Decrement(1) r.peersRemaining.Decrement(1)
return return
} }
// ok let's do this! // ok let's do this!
// create a context from our proc.
ctx := ctxproc.WithProcessClosing(context.Background(), proc)
// make sure we do this when we exit // make sure we do this when we exit
defer func() { defer func() {
// signal we're done proccessing peer p // signal we're done proccessing peer p
@ -212,10 +230,11 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
r.rateLimit <- struct{}{} r.rateLimit <- struct{}{}
pi := peer.PeerInfo{ID: p} pi := peer.PeerInfo{ID: p}
if err := r.query.dht.host.Connect(cg.Context(), pi); err != nil {
if err := r.query.dht.host.Connect(ctx, pi); err != nil {
log.Debugf("Error connecting: %s", err) log.Debugf("Error connecting: %s", err)
notif.PublishQueryEvent(cg.Context(), &notif.QueryEvent{ notif.PublishQueryEvent(ctx, &notif.QueryEvent{
Type: notif.QueryError, Type: notif.QueryError,
Extra: err.Error(), Extra: err.Error(),
}) })
@ -231,7 +250,7 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
} }
// finally, run the query against this peer // finally, run the query against this peer
res, err := r.query.qfunc(cg.Context(), p) res, err := r.query.qfunc(ctx, p)
if err != nil { if err != nil {
log.Debugf("ERROR worker for: %v %v", p, err) log.Debugf("ERROR worker for: %v %v", p, err)
@ -244,7 +263,7 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
r.Lock() r.Lock()
r.result = res r.result = res
r.Unlock() r.Unlock()
go r.cg.Close() // signal to everyone that we're done. go r.proc.Close() // signal to everyone that we're done.
// must be async, as we're one of the children, and Close blocks. // must be async, as we're one of the children, and Close blocks.
} else if len(res.closerPeers) > 0 { } else if len(res.closerPeers) > 0 {
@ -257,7 +276,7 @@ func (r *dhtQueryRunner) queryPeer(cg ctxgroup.ContextGroup, p peer.ID) {
// add their addresses to the dialer's peerstore // add their addresses to the dialer's peerstore
r.query.dht.peerstore.AddAddrs(next.ID, next.Addrs, peer.TempAddrTTL) r.query.dht.peerstore.AddAddrs(next.ID, next.Addrs, peer.TempAddrTTL)
r.addPeerToQuery(cg.Context(), next.ID) r.addPeerToQuery(next.ID)
log.Debugf("PEERS CLOSER -- worker for: %v added %v (%v)", p, next.ID, next.Addrs) log.Debugf("PEERS CLOSER -- worker for: %v added %v (%v)", p, next.ID, next.Addrs)
} }
} else { } else {