mirror of
https://github.com/ipfs/kubo.git
synced 2025-06-22 21:21:43 +08:00
@ -18,6 +18,8 @@ const (
|
||||
QueryError
|
||||
Provider
|
||||
Value
|
||||
AddingPeer
|
||||
DialingPeer
|
||||
)
|
||||
|
||||
type QueryEvent struct {
|
||||
|
@ -420,18 +420,18 @@ func TestDialBackoffClears(t *testing.T) {
|
||||
}
|
||||
s1.peers.AddAddrs(s2.local, ifaceAddrs1, peer.PermanentAddrTTL)
|
||||
|
||||
before = time.Now()
|
||||
if _, err := s1.Dial(ctx, s2.local); err == nil {
|
||||
t.Fatal("should have failed to dial backed off peer")
|
||||
}
|
||||
|
||||
time.Sleep(baseBackoffTime)
|
||||
|
||||
if c, err := s1.Dial(ctx, s2.local); err != nil {
|
||||
t.Fatal(err)
|
||||
} else {
|
||||
c.Close()
|
||||
t.Log("correctly connected")
|
||||
}
|
||||
duration = time.Now().Sub(before)
|
||||
|
||||
if duration >= dt {
|
||||
// t.Error("took too long", duration, dt)
|
||||
}
|
||||
|
||||
if s1.backf.Backoff(s2.local) {
|
||||
t.Error("s2 should no longer be on backoff")
|
||||
|
@ -145,44 +145,71 @@ func (ds *dialsync) Unlock(dst peer.ID) {
|
||||
// dialbackoff.Clear(p)
|
||||
// }
|
||||
//
|
||||
|
||||
type dialbackoff struct {
|
||||
entries map[peer.ID]struct{}
|
||||
entries map[peer.ID]*backoffPeer
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
type backoffPeer struct {
|
||||
tries int
|
||||
until time.Time
|
||||
}
|
||||
|
||||
func (db *dialbackoff) init() {
|
||||
if db.entries == nil {
|
||||
db.entries = make(map[peer.ID]struct{})
|
||||
db.entries = make(map[peer.ID]*backoffPeer)
|
||||
}
|
||||
}
|
||||
|
||||
// Backoff returns whether the client should backoff from dialing
|
||||
// peeer p
|
||||
func (db *dialbackoff) Backoff(p peer.ID) bool {
|
||||
// peer p
|
||||
func (db *dialbackoff) Backoff(p peer.ID) (backoff bool) {
|
||||
db.lock.Lock()
|
||||
defer db.lock.Unlock()
|
||||
db.init()
|
||||
_, found := db.entries[p]
|
||||
db.lock.Unlock()
|
||||
return found
|
||||
bp, found := db.entries[p]
|
||||
if found && time.Now().Before(bp.until) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
const baseBackoffTime = time.Second * 5
|
||||
const maxBackoffTime = time.Minute * 5
|
||||
|
||||
// AddBackoff lets other nodes know that we've entered backoff with
|
||||
// peer p, so dialers should not wait unnecessarily. We still will
|
||||
// attempt to dial with one goroutine, in case we get through.
|
||||
func (db *dialbackoff) AddBackoff(p peer.ID) {
|
||||
db.lock.Lock()
|
||||
defer db.lock.Unlock()
|
||||
db.init()
|
||||
db.entries[p] = struct{}{}
|
||||
db.lock.Unlock()
|
||||
bp, ok := db.entries[p]
|
||||
if !ok {
|
||||
db.entries[p] = &backoffPeer{
|
||||
tries: 1,
|
||||
until: time.Now().Add(baseBackoffTime),
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
expTimeAdd := time.Second * time.Duration(bp.tries*bp.tries)
|
||||
if expTimeAdd > maxBackoffTime {
|
||||
expTimeAdd = maxBackoffTime
|
||||
}
|
||||
bp.until = time.Now().Add(baseBackoffTime + expTimeAdd)
|
||||
bp.tries++
|
||||
}
|
||||
|
||||
// Clear removes a backoff record. Clients should call this after a
|
||||
// successful Dial.
|
||||
func (db *dialbackoff) Clear(p peer.ID) {
|
||||
db.lock.Lock()
|
||||
defer db.lock.Unlock()
|
||||
db.init()
|
||||
delete(db.entries, p)
|
||||
db.lock.Unlock()
|
||||
}
|
||||
|
||||
// Dial connects to a peer.
|
||||
@ -225,14 +252,20 @@ func (s *Swarm) gatedDialAttempt(ctx context.Context, p peer.ID) (*Conn, error)
|
||||
|
||||
// check if there's an ongoing dial to this peer
|
||||
if ok, wait := s.dsync.Lock(p); ok {
|
||||
defer s.dsync.Unlock(p)
|
||||
|
||||
// if this peer has been backed off, lets get out of here
|
||||
if s.backf.Backoff(p) {
|
||||
log.Event(ctx, "swarmDialBackoff", logdial)
|
||||
return nil, ErrDialBackoff
|
||||
}
|
||||
|
||||
// ok, we have been charged to dial! let's do it.
|
||||
// if it succeeds, dial will add the conn to the swarm itself.
|
||||
|
||||
defer log.EventBegin(ctx, "swarmDialAttemptStart", logdial).Done()
|
||||
ctxT, cancel := context.WithTimeout(ctx, s.dialT)
|
||||
conn, err := s.dial(ctxT, p)
|
||||
cancel()
|
||||
s.dsync.Unlock(p)
|
||||
log.Debugf("dial end %s", conn)
|
||||
if err != nil {
|
||||
log.Event(ctx, "swarmDialBackoffAdd", logdial)
|
||||
|
@ -157,5 +157,8 @@ func (s *Swarm) connHandler(c *ps.Conn) *Conn {
|
||||
return nil
|
||||
}
|
||||
|
||||
// if a peer dials us, remove from dial backoff.
|
||||
s.backf.Clear(sc.RemotePeer())
|
||||
|
||||
return sc
|
||||
}
|
||||
|
@ -79,6 +79,8 @@ type dhtQueryRunner struct {
|
||||
rateLimit chan struct{} // processing semaphore
|
||||
log logging.EventLogger
|
||||
|
||||
runCtx context.Context
|
||||
|
||||
proc process.Process
|
||||
sync.RWMutex
|
||||
}
|
||||
@ -98,6 +100,7 @@ func newQueryRunner(q *dhtQuery) *dhtQueryRunner {
|
||||
|
||||
func (r *dhtQueryRunner) Run(ctx context.Context, peers []peer.ID) (*dhtQueryResult, error) {
|
||||
r.log = log
|
||||
r.runCtx = ctx
|
||||
|
||||
if len(peers) == 0 {
|
||||
log.Warning("Running query with no peers!")
|
||||
@ -167,6 +170,11 @@ func (r *dhtQueryRunner) addPeerToQuery(next peer.ID) {
|
||||
return
|
||||
}
|
||||
|
||||
notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{
|
||||
Type: notif.AddingPeer,
|
||||
ID: next,
|
||||
})
|
||||
|
||||
r.peersRemaining.Increment(1)
|
||||
select {
|
||||
case r.peersToQuery.EnqChan <- next:
|
||||
@ -221,7 +229,12 @@ func (r *dhtQueryRunner) queryPeer(proc process.Process, p peer.ID) {
|
||||
// make sure we're connected to the peer.
|
||||
// FIXME abstract away into the network layer
|
||||
if conns := r.query.dht.host.Network().ConnsToPeer(p); len(conns) == 0 {
|
||||
log.Infof("not connected. dialing.")
|
||||
log.Error("not connected. dialing.")
|
||||
|
||||
notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{
|
||||
Type: notif.DialingPeer,
|
||||
ID: p,
|
||||
})
|
||||
// while we dial, we do not take up a rate limit. this is to allow
|
||||
// forward progress during potentially very high latency dials.
|
||||
r.rateLimit <- struct{}{}
|
||||
@ -231,9 +244,10 @@ func (r *dhtQueryRunner) queryPeer(proc process.Process, p peer.ID) {
|
||||
if err := r.query.dht.host.Connect(ctx, pi); err != nil {
|
||||
log.Debugf("Error connecting: %s", err)
|
||||
|
||||
notif.PublishQueryEvent(ctx, ¬if.QueryEvent{
|
||||
notif.PublishQueryEvent(r.runCtx, ¬if.QueryEvent{
|
||||
Type: notif.QueryError,
|
||||
Extra: err.Error(),
|
||||
ID: p,
|
||||
})
|
||||
|
||||
r.Lock()
|
||||
|
Reference in New Issue
Block a user