mirror of
https://github.com/ipfs/kubo.git
synced 2025-06-26 07:28:20 +08:00
Merge pull request #1802 from ipfs/fix/too-many-fd
rate limit concurrent peer dials
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
package swarm
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
@ -49,7 +50,8 @@ func TestSimultOpenMany(t *testing.T) {
|
||||
|
||||
addrs := 20
|
||||
rounds := 10
|
||||
if ci.IsRunning() {
|
||||
if ci.IsRunning() || runtime.GOOS == "darwin" {
|
||||
// osx has a limit of 256 file descriptors
|
||||
addrs = 10
|
||||
rounds = 5
|
||||
}
|
||||
|
@ -64,6 +64,9 @@ type Swarm struct {
|
||||
// filters for addresses that shouldnt be dialed
|
||||
Filters *filter.Filters
|
||||
|
||||
// file descriptor rate limited
|
||||
fdRateLimit chan struct{}
|
||||
|
||||
proc goprocess.Process
|
||||
ctx context.Context
|
||||
bwc metrics.Reporter
|
||||
@ -86,6 +89,7 @@ func NewSwarm(ctx context.Context, listenAddrs []ma.Multiaddr,
|
||||
dialT: DialTimeout,
|
||||
notifs: make(map[inet.Notifiee]ps.Notifiee),
|
||||
bwc: bwc,
|
||||
fdRateLimit: make(chan struct{}, concurrentFdDials),
|
||||
Filters: filter.NewFilters(),
|
||||
}
|
||||
|
||||
|
@ -44,6 +44,9 @@ var (
|
||||
// add loop back in Dial(.)
|
||||
const dialAttempts = 1
|
||||
|
||||
// number of concurrent outbound dials over transports that consume file descriptors
|
||||
const concurrentFdDials = 160
|
||||
|
||||
// DialTimeout is the amount of time each dial attempt has. We can think about making
|
||||
// this larger down the road, or putting more granular timeouts (i.e. within each
|
||||
// subcomponent of Dial)
|
||||
@ -115,6 +118,7 @@ func (ds *dialsync) Unlock(dst peer.ID) {
|
||||
if !found {
|
||||
panic("called dialDone with no ongoing dials to peer: " + dst.Pretty())
|
||||
}
|
||||
|
||||
delete(ds.ongoing, dst) // remove ongoing dial
|
||||
close(wait) // release everyone else
|
||||
ds.lock.Unlock()
|
||||
@ -398,7 +402,7 @@ func (s *Swarm) dialAddrs(ctx context.Context, d *conn.Dialer, p peer.ID, remote
|
||||
// to end early.
|
||||
go func() {
|
||||
// rate limiting just in case. at most 10 addrs at once.
|
||||
limiter := ratelimit.NewRateLimiter(process.Background(), 10)
|
||||
limiter := ratelimit.NewRateLimiter(process.Background(), 8)
|
||||
limiter.Go(func(worker process.Process) {
|
||||
// permute addrs so we try different sets first each time.
|
||||
for _, i := range rand.Perm(len(remoteAddrs)) {
|
||||
@ -411,9 +415,27 @@ func (s *Swarm) dialAddrs(ctx context.Context, d *conn.Dialer, p peer.ID, remote
|
||||
}
|
||||
|
||||
workerAddr := remoteAddrs[i] // shadow variable to avoid race
|
||||
|
||||
// we have to do the waiting concurrently because there are addrs
|
||||
// that SHOULD NOT be rate limited (utp), nor blocked by other
|
||||
// rate limited addrs (tcp).
|
||||
//
|
||||
// (and we need to call `limiter.Go`, instead of `go` as required
|
||||
// by goproc/limiter semantics. note: limiter.Go is not LimitedGo.)
|
||||
limiter.Go(func(p process.Process) {
|
||||
|
||||
// returns whatever ratelimiting is acceptable for workerAddr.
|
||||
// may not rate limit at all.
|
||||
rl := s.addrDialRateLimit(workerAddr)
|
||||
rl <- struct{}{}
|
||||
|
||||
limiter.LimitedGo(func(worker process.Process) {
|
||||
dialSingleAddr(workerAddr)
|
||||
})
|
||||
|
||||
<-rl
|
||||
})
|
||||
|
||||
}
|
||||
})
|
||||
|
||||
@ -491,3 +513,23 @@ func dialConnSetup(ctx context.Context, s *Swarm, connC conn.Conn) (*Conn, error
|
||||
|
||||
return swarmC, err
|
||||
}
|
||||
|
||||
// addrDialRateLimit returns a ratelimiting channel for dialing transport
|
||||
// addrs like a. for example, tcp is fd-ratelimited. utp is not ratelimited.
|
||||
func (s *Swarm) addrDialRateLimit(a ma.Multiaddr) chan struct{} {
|
||||
if isFDCostlyTransport(a) {
|
||||
return s.fdRateLimit
|
||||
}
|
||||
|
||||
// do not rate limit it at all
|
||||
return make(chan struct{}, 1)
|
||||
}
|
||||
|
||||
func isFDCostlyTransport(a ma.Multiaddr) bool {
|
||||
return isTCPMultiaddr(a)
|
||||
}
|
||||
|
||||
func isTCPMultiaddr(a ma.Multiaddr) bool {
|
||||
p := a.Protocols()
|
||||
return len(p) == 2 && (p[0].Name == "ip4" || p[0].Name == "ip6") && p[1].Name == "tcp"
|
||||
}
|
||||
|
Reference in New Issue
Block a user