mirror of
https://github.com/ipfs/kubo.git
synced 2025-07-01 10:49:24 +08:00
blocks/blockstore: Add bloom filter
Replace write_cache with bloom_cache Improve ARC caching Fix small issue in case of AllKeysChan fails deps: Update go-datastore blocks/blockstore: Invalidate ARC cache before deletin block deps: Update go-datastore License: MIT Signed-off-by: Jakub Sztandera <kubuxu@protonmail.ch>
This commit is contained in:
175
blocks/blockstore/bloom_cache.go
Normal file
175
blocks/blockstore/bloom_cache.go
Normal file
@ -0,0 +1,175 @@
|
||||
package blockstore
|
||||
|
||||
import (
|
||||
"github.com/ipfs/go-ipfs/blocks"
|
||||
key "github.com/ipfs/go-ipfs/blocks/key"
|
||||
lru "gx/ipfs/QmVYxfoJQiZijTgPNHCHgHELvQpbsJNTg6Crmc3dQkj3yy/golang-lru"
|
||||
bloom "gx/ipfs/QmWQ2SJisXwcCLsUXLwYCKSfyExXjFRW2WbBH5sqCUnwX5/bbloom"
|
||||
context "gx/ipfs/QmZy2y8t9zQH2a1b8q2ZSLKp17ATuJoCNxxyMFG5qFExpt/go-net/context"
|
||||
ds "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore"
|
||||
)
|
||||
|
||||
// BloomCached returns Blockstore that caches Has requests using Bloom filter
|
||||
// Size is size of bloom filter in bytes
|
||||
func BloomCached(bs Blockstore, bloomSize, lruSize int) (*bloomcache, error) {
|
||||
bl, err := bloom.New(float64(bloomSize), float64(7))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arc, err := lru.NewARC(lruSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
bc := &bloomcache{blockstore: bs, bloom: bl, arc: arc}
|
||||
bc.Invalidate()
|
||||
go bc.Rebuild()
|
||||
|
||||
return bc, nil
|
||||
}
|
||||
|
||||
type bloomcache struct {
|
||||
bloom *bloom.Bloom
|
||||
active bool
|
||||
|
||||
arc *lru.ARCCache
|
||||
// This chan is only used for testing to wait for bloom to enable
|
||||
rebuildChan chan struct{}
|
||||
blockstore Blockstore
|
||||
|
||||
// Statistics
|
||||
hits uint64
|
||||
misses uint64
|
||||
}
|
||||
|
||||
func (b *bloomcache) Invalidate() {
|
||||
b.rebuildChan = make(chan struct{})
|
||||
b.active = false
|
||||
}
|
||||
|
||||
func (b *bloomcache) BloomActive() bool {
|
||||
return b.active
|
||||
}
|
||||
|
||||
func (b *bloomcache) Rebuild() {
|
||||
ctx := context.TODO()
|
||||
evt := log.EventBegin(ctx, "bloomcache.Rebuild")
|
||||
defer evt.Done()
|
||||
|
||||
ch, err := b.blockstore.AllKeysChan(ctx)
|
||||
if err != nil {
|
||||
log.Errorf("AllKeysChan failed in bloomcache rebuild with: %v", err)
|
||||
return
|
||||
}
|
||||
for key := range ch {
|
||||
b.bloom.AddTS([]byte(key)) // Use binary key, the more compact the better
|
||||
}
|
||||
close(b.rebuildChan)
|
||||
b.active = true
|
||||
}
|
||||
|
||||
func (b *bloomcache) DeleteBlock(k key.Key) error {
|
||||
if has, ok := b.hasCached(k); ok && !has {
|
||||
return ErrNotFound
|
||||
}
|
||||
|
||||
b.arc.Remove(k) // Invalidate cache before deleting.
|
||||
err := b.blockstore.DeleteBlock(k)
|
||||
if err == nil {
|
||||
b.arc.Add(k, false)
|
||||
} else if err == ds.ErrNotFound || err == ErrNotFound {
|
||||
b.arc.Add(k, false)
|
||||
return ErrNotFound
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// if ok == false has is inconclusive
|
||||
// if ok == true then has respons to question: is it contained
|
||||
func (b *bloomcache) hasCached(k key.Key) (has bool, ok bool) {
|
||||
if k == "" {
|
||||
return true, true
|
||||
}
|
||||
if b.active {
|
||||
blr := b.bloom.HasTS([]byte(k))
|
||||
if blr == false { // not contained in bloom is only conclusive answer bloom gives
|
||||
return blr, true
|
||||
}
|
||||
}
|
||||
h, ok := b.arc.Get(k)
|
||||
if ok {
|
||||
return h.(bool), ok
|
||||
} else {
|
||||
return false, ok
|
||||
}
|
||||
}
|
||||
|
||||
func (b *bloomcache) Has(k key.Key) (bool, error) {
|
||||
if has, ok := b.hasCached(k); ok {
|
||||
return has, nil
|
||||
}
|
||||
|
||||
res, err := b.blockstore.Has(k)
|
||||
if err == nil {
|
||||
b.arc.Add(k, res)
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
||||
func (b *bloomcache) Get(k key.Key) (blocks.Block, error) {
|
||||
if has, ok := b.hasCached(k); ok && !has {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
|
||||
bl, err := b.blockstore.Get(k)
|
||||
if bl == nil && err == ErrNotFound {
|
||||
b.arc.Add(k, false)
|
||||
} else if bl != nil {
|
||||
b.arc.Add(k, true)
|
||||
}
|
||||
return bl, err
|
||||
}
|
||||
|
||||
func (b *bloomcache) Put(bl blocks.Block) error {
|
||||
if has, ok := b.hasCached(bl.Key()); ok && has {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := b.blockstore.Put(bl)
|
||||
if err == nil {
|
||||
b.bloom.AddTS([]byte(bl.Key()))
|
||||
b.arc.Add(bl.Key(), true)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *bloomcache) PutMany(bs []blocks.Block) error {
|
||||
var good []blocks.Block
|
||||
for _, block := range bs {
|
||||
if has, ok := b.hasCached(block.Key()); !ok || (ok && !has) {
|
||||
good = append(good, block)
|
||||
}
|
||||
}
|
||||
err := b.blockstore.PutMany(bs)
|
||||
if err == nil {
|
||||
for _, block := range bs {
|
||||
b.bloom.AddTS([]byte(block.Key()))
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *bloomcache) AllKeysChan(ctx context.Context) (<-chan key.Key, error) {
|
||||
return b.blockstore.AllKeysChan(ctx)
|
||||
}
|
||||
|
||||
func (b *bloomcache) GCLock() Unlocker {
|
||||
return b.blockstore.(GCBlockstore).GCLock()
|
||||
}
|
||||
|
||||
func (b *bloomcache) PinLock() Unlocker {
|
||||
return b.blockstore.(GCBlockstore).PinLock()
|
||||
}
|
||||
|
||||
func (b *bloomcache) GCRequested() bool {
|
||||
return b.blockstore.(GCBlockstore).GCRequested()
|
||||
}
|
@ -1,28 +1,32 @@
|
||||
package blockstore
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"fmt"
|
||||
"github.com/ipfs/go-ipfs/blocks"
|
||||
ds "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore"
|
||||
dsq "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore/query"
|
||||
syncds "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore/sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestReturnsErrorWhenSizeNegative(t *testing.T) {
|
||||
bs := NewBlockstore(syncds.MutexWrap(ds.NewMapDatastore()))
|
||||
_, err := WriteCached(bs, -1)
|
||||
if err != nil {
|
||||
return
|
||||
_, err := BloomCached(bs, 100, -1)
|
||||
if err == nil {
|
||||
t.Fail()
|
||||
}
|
||||
_, err = BloomCached(bs, -1, 100)
|
||||
if err == nil {
|
||||
t.Fail()
|
||||
}
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
func TestRemoveCacheEntryOnDelete(t *testing.T) {
|
||||
b := blocks.NewBlock([]byte("foo"))
|
||||
cd := &callbackDatastore{f: func() {}, ds: ds.NewMapDatastore()}
|
||||
bs := NewBlockstore(syncds.MutexWrap(cd))
|
||||
cachedbs, err := WriteCached(bs, 1)
|
||||
cachedbs, err := BloomCached(bs, 1, 1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -43,7 +47,7 @@ func TestRemoveCacheEntryOnDelete(t *testing.T) {
|
||||
func TestElideDuplicateWrite(t *testing.T) {
|
||||
cd := &callbackDatastore{f: func() {}, ds: ds.NewMapDatastore()}
|
||||
bs := NewBlockstore(syncds.MutexWrap(cd))
|
||||
cachedbs, err := WriteCached(bs, 1)
|
||||
cachedbs, err := BloomCached(bs, 1, 1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -56,6 +60,37 @@ func TestElideDuplicateWrite(t *testing.T) {
|
||||
})
|
||||
cachedbs.Put(b1)
|
||||
}
|
||||
func TestHasIsBloomCached(t *testing.T) {
|
||||
cd := &callbackDatastore{f: func() {}, ds: ds.NewMapDatastore()}
|
||||
bs := NewBlockstore(syncds.MutexWrap(cd))
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
bs.Put(blocks.NewBlock([]byte(fmt.Sprintf("data: %d", i))))
|
||||
}
|
||||
cachedbs, err := BloomCached(bs, 256*1024, 128)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-cachedbs.rebuildChan:
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatalf("Timeout wating for rebuild: %d", cachedbs.bloom.ElementsAdded())
|
||||
}
|
||||
|
||||
cacheFails := 0
|
||||
cd.SetFunc(func() {
|
||||
cacheFails++
|
||||
})
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
cachedbs.Has(blocks.NewBlock([]byte(fmt.Sprintf("data: %d", i+2000))).Key())
|
||||
}
|
||||
|
||||
if float64(cacheFails)/float64(1000) > float64(0.05) {
|
||||
t.Fatal("Bloom filter has cache miss rate of more than 5%")
|
||||
}
|
||||
}
|
||||
|
||||
type callbackDatastore struct {
|
||||
f func()
|
@ -1,78 +0,0 @@
|
||||
package blockstore
|
||||
|
||||
import (
|
||||
"github.com/ipfs/go-ipfs/blocks"
|
||||
key "github.com/ipfs/go-ipfs/blocks/key"
|
||||
"gx/ipfs/QmVYxfoJQiZijTgPNHCHgHELvQpbsJNTg6Crmc3dQkj3yy/golang-lru"
|
||||
context "gx/ipfs/QmZy2y8t9zQH2a1b8q2ZSLKp17ATuJoCNxxyMFG5qFExpt/go-net/context"
|
||||
)
|
||||
|
||||
// WriteCached returns a blockstore that caches up to |size| unique writes (bs.Put).
|
||||
func WriteCached(bs Blockstore, size int) (*writecache, error) {
|
||||
c, err := lru.New(size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &writecache{blockstore: bs, cache: c}, nil
|
||||
}
|
||||
|
||||
type writecache struct {
|
||||
cache *lru.Cache // pointer b/c Cache contains a Mutex as value (complicates copying)
|
||||
blockstore Blockstore
|
||||
}
|
||||
|
||||
func (w *writecache) DeleteBlock(k key.Key) error {
|
||||
defer log.EventBegin(context.TODO(), "writecache.BlockRemoved", &k).Done()
|
||||
w.cache.Remove(k)
|
||||
return w.blockstore.DeleteBlock(k)
|
||||
}
|
||||
|
||||
func (w *writecache) Has(k key.Key) (bool, error) {
|
||||
if _, ok := w.cache.Get(k); ok {
|
||||
return true, nil
|
||||
}
|
||||
return w.blockstore.Has(k)
|
||||
}
|
||||
|
||||
func (w *writecache) Get(k key.Key) (blocks.Block, error) {
|
||||
return w.blockstore.Get(k)
|
||||
}
|
||||
|
||||
func (w *writecache) Put(b blocks.Block) error {
|
||||
k := b.Key()
|
||||
if _, ok := w.cache.Get(k); ok {
|
||||
return nil
|
||||
}
|
||||
defer log.EventBegin(context.TODO(), "writecache.BlockAdded", &k).Done()
|
||||
|
||||
w.cache.Add(b.Key(), struct{}{})
|
||||
return w.blockstore.Put(b)
|
||||
}
|
||||
|
||||
func (w *writecache) PutMany(bs []blocks.Block) error {
|
||||
var good []blocks.Block
|
||||
for _, b := range bs {
|
||||
if _, ok := w.cache.Get(b.Key()); !ok {
|
||||
good = append(good, b)
|
||||
k := b.Key()
|
||||
defer log.EventBegin(context.TODO(), "writecache.BlockAdded", &k).Done()
|
||||
}
|
||||
}
|
||||
return w.blockstore.PutMany(good)
|
||||
}
|
||||
|
||||
func (w *writecache) AllKeysChan(ctx context.Context) (<-chan key.Key, error) {
|
||||
return w.blockstore.AllKeysChan(ctx)
|
||||
}
|
||||
|
||||
func (w *writecache) GCLock() Unlocker {
|
||||
return w.blockstore.(GCBlockstore).GCLock()
|
||||
}
|
||||
|
||||
func (w *writecache) PinLock() Unlocker {
|
||||
return w.blockstore.(GCBlockstore).PinLock()
|
||||
}
|
||||
|
||||
func (w *writecache) GCRequested() bool {
|
||||
return w.blockstore.(GCBlockstore).GCRequested()
|
||||
}
|
@ -131,7 +131,7 @@ func setupNode(ctx context.Context, n *IpfsNode, cfg *BuildCfg) error {
|
||||
|
||||
var err error
|
||||
bs := bstore.NewBlockstore(n.Repo.Datastore())
|
||||
n.Blockstore, err = bstore.WriteCached(bs, kSizeBlockstoreWriteCache)
|
||||
n.Blockstore, err = bstore.BloomCached(bs, 256*1024, kSizeBlockstoreWriteCache)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -87,12 +87,13 @@ func (i *Instance) SetBlockstoreLatency(t time.Duration) time.Duration {
|
||||
// just a much better idea.
|
||||
func Session(ctx context.Context, net tn.Network, p testutil.Identity) Instance {
|
||||
bsdelay := delay.Fixed(0)
|
||||
const bloomSize = 512
|
||||
const writeCacheElems = 100
|
||||
|
||||
adapter := net.Adapter(p)
|
||||
dstore := ds_sync.MutexWrap(datastore2.WithDelay(ds.NewMapDatastore(), bsdelay))
|
||||
|
||||
bstore, err := blockstore.WriteCached(blockstore.NewBlockstore(ds_sync.MutexWrap(dstore)), writeCacheElems)
|
||||
bstore, err := blockstore.BloomCached(blockstore.NewBlockstore(ds_sync.MutexWrap(dstore)), bloomSize, writeCacheElems)
|
||||
if err != nil {
|
||||
panic(err.Error()) // FIXME perhaps change signature and return error.
|
||||
}
|
||||
|
@ -177,6 +177,12 @@
|
||||
"hash": "Qmb1DA2A9LS2wR4FFweB4uEDomFsdmnw1VLawLE1yQzudj",
|
||||
"name": "base32",
|
||||
"version": "0.0.0"
|
||||
},
|
||||
{
|
||||
"author": "kubuxu",
|
||||
"hash": "QmWQ2SJisXwcCLsUXLwYCKSfyExXjFRW2WbBH5sqCUnwX5",
|
||||
"name": "bbloom",
|
||||
"version": "0.0.2"
|
||||
}
|
||||
],
|
||||
"gxVersion": "0.4.0",
|
||||
|
Reference in New Issue
Block a user