1
0
mirror of https://github.com/ipfs/kubo.git synced 2025-07-01 10:49:24 +08:00

blocks/blockstore: Add bloom filter

Replace write_cache with bloom_cache
Improve ARC caching
Fix small issue in case of AllKeysChan fails
deps: Update go-datastore
blocks/blockstore: Invalidate ARC cache before deletin block
deps: Update go-datastore

License: MIT
Signed-off-by: Jakub Sztandera <kubuxu@protonmail.ch>
This commit is contained in:
Jakub Sztandera
2016-06-21 21:05:59 +02:00
parent 50a7df6c8e
commit 5d83d89f36
6 changed files with 227 additions and 88 deletions

View File

@ -0,0 +1,175 @@
package blockstore
import (
"github.com/ipfs/go-ipfs/blocks"
key "github.com/ipfs/go-ipfs/blocks/key"
lru "gx/ipfs/QmVYxfoJQiZijTgPNHCHgHELvQpbsJNTg6Crmc3dQkj3yy/golang-lru"
bloom "gx/ipfs/QmWQ2SJisXwcCLsUXLwYCKSfyExXjFRW2WbBH5sqCUnwX5/bbloom"
context "gx/ipfs/QmZy2y8t9zQH2a1b8q2ZSLKp17ATuJoCNxxyMFG5qFExpt/go-net/context"
ds "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore"
)
// BloomCached returns Blockstore that caches Has requests using Bloom filter
// Size is size of bloom filter in bytes
func BloomCached(bs Blockstore, bloomSize, lruSize int) (*bloomcache, error) {
bl, err := bloom.New(float64(bloomSize), float64(7))
if err != nil {
return nil, err
}
arc, err := lru.NewARC(lruSize)
if err != nil {
return nil, err
}
bc := &bloomcache{blockstore: bs, bloom: bl, arc: arc}
bc.Invalidate()
go bc.Rebuild()
return bc, nil
}
type bloomcache struct {
bloom *bloom.Bloom
active bool
arc *lru.ARCCache
// This chan is only used for testing to wait for bloom to enable
rebuildChan chan struct{}
blockstore Blockstore
// Statistics
hits uint64
misses uint64
}
func (b *bloomcache) Invalidate() {
b.rebuildChan = make(chan struct{})
b.active = false
}
func (b *bloomcache) BloomActive() bool {
return b.active
}
func (b *bloomcache) Rebuild() {
ctx := context.TODO()
evt := log.EventBegin(ctx, "bloomcache.Rebuild")
defer evt.Done()
ch, err := b.blockstore.AllKeysChan(ctx)
if err != nil {
log.Errorf("AllKeysChan failed in bloomcache rebuild with: %v", err)
return
}
for key := range ch {
b.bloom.AddTS([]byte(key)) // Use binary key, the more compact the better
}
close(b.rebuildChan)
b.active = true
}
func (b *bloomcache) DeleteBlock(k key.Key) error {
if has, ok := b.hasCached(k); ok && !has {
return ErrNotFound
}
b.arc.Remove(k) // Invalidate cache before deleting.
err := b.blockstore.DeleteBlock(k)
if err == nil {
b.arc.Add(k, false)
} else if err == ds.ErrNotFound || err == ErrNotFound {
b.arc.Add(k, false)
return ErrNotFound
}
return err
}
// if ok == false has is inconclusive
// if ok == true then has respons to question: is it contained
func (b *bloomcache) hasCached(k key.Key) (has bool, ok bool) {
if k == "" {
return true, true
}
if b.active {
blr := b.bloom.HasTS([]byte(k))
if blr == false { // not contained in bloom is only conclusive answer bloom gives
return blr, true
}
}
h, ok := b.arc.Get(k)
if ok {
return h.(bool), ok
} else {
return false, ok
}
}
func (b *bloomcache) Has(k key.Key) (bool, error) {
if has, ok := b.hasCached(k); ok {
return has, nil
}
res, err := b.blockstore.Has(k)
if err == nil {
b.arc.Add(k, res)
}
return res, err
}
func (b *bloomcache) Get(k key.Key) (blocks.Block, error) {
if has, ok := b.hasCached(k); ok && !has {
return nil, ErrNotFound
}
bl, err := b.blockstore.Get(k)
if bl == nil && err == ErrNotFound {
b.arc.Add(k, false)
} else if bl != nil {
b.arc.Add(k, true)
}
return bl, err
}
func (b *bloomcache) Put(bl blocks.Block) error {
if has, ok := b.hasCached(bl.Key()); ok && has {
return nil
}
err := b.blockstore.Put(bl)
if err == nil {
b.bloom.AddTS([]byte(bl.Key()))
b.arc.Add(bl.Key(), true)
}
return err
}
func (b *bloomcache) PutMany(bs []blocks.Block) error {
var good []blocks.Block
for _, block := range bs {
if has, ok := b.hasCached(block.Key()); !ok || (ok && !has) {
good = append(good, block)
}
}
err := b.blockstore.PutMany(bs)
if err == nil {
for _, block := range bs {
b.bloom.AddTS([]byte(block.Key()))
}
}
return err
}
func (b *bloomcache) AllKeysChan(ctx context.Context) (<-chan key.Key, error) {
return b.blockstore.AllKeysChan(ctx)
}
func (b *bloomcache) GCLock() Unlocker {
return b.blockstore.(GCBlockstore).GCLock()
}
func (b *bloomcache) PinLock() Unlocker {
return b.blockstore.(GCBlockstore).PinLock()
}
func (b *bloomcache) GCRequested() bool {
return b.blockstore.(GCBlockstore).GCRequested()
}

View File

@ -1,28 +1,32 @@
package blockstore
import (
"testing"
"fmt"
"github.com/ipfs/go-ipfs/blocks"
ds "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore"
dsq "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore/query"
syncds "gx/ipfs/QmfQzVugPq1w5shWRcLWSeiHF4a2meBX7yVD8Vw7GWJM9o/go-datastore/sync"
"testing"
"time"
)
func TestReturnsErrorWhenSizeNegative(t *testing.T) {
bs := NewBlockstore(syncds.MutexWrap(ds.NewMapDatastore()))
_, err := WriteCached(bs, -1)
if err != nil {
return
_, err := BloomCached(bs, 100, -1)
if err == nil {
t.Fail()
}
_, err = BloomCached(bs, -1, 100)
if err == nil {
t.Fail()
}
t.Fail()
}
func TestRemoveCacheEntryOnDelete(t *testing.T) {
b := blocks.NewBlock([]byte("foo"))
cd := &callbackDatastore{f: func() {}, ds: ds.NewMapDatastore()}
bs := NewBlockstore(syncds.MutexWrap(cd))
cachedbs, err := WriteCached(bs, 1)
cachedbs, err := BloomCached(bs, 1, 1)
if err != nil {
t.Fatal(err)
}
@ -43,7 +47,7 @@ func TestRemoveCacheEntryOnDelete(t *testing.T) {
func TestElideDuplicateWrite(t *testing.T) {
cd := &callbackDatastore{f: func() {}, ds: ds.NewMapDatastore()}
bs := NewBlockstore(syncds.MutexWrap(cd))
cachedbs, err := WriteCached(bs, 1)
cachedbs, err := BloomCached(bs, 1, 1)
if err != nil {
t.Fatal(err)
}
@ -56,6 +60,37 @@ func TestElideDuplicateWrite(t *testing.T) {
})
cachedbs.Put(b1)
}
func TestHasIsBloomCached(t *testing.T) {
cd := &callbackDatastore{f: func() {}, ds: ds.NewMapDatastore()}
bs := NewBlockstore(syncds.MutexWrap(cd))
for i := 0; i < 1000; i++ {
bs.Put(blocks.NewBlock([]byte(fmt.Sprintf("data: %d", i))))
}
cachedbs, err := BloomCached(bs, 256*1024, 128)
if err != nil {
t.Fatal(err)
}
select {
case <-cachedbs.rebuildChan:
case <-time.After(1 * time.Second):
t.Fatalf("Timeout wating for rebuild: %d", cachedbs.bloom.ElementsAdded())
}
cacheFails := 0
cd.SetFunc(func() {
cacheFails++
})
for i := 0; i < 1000; i++ {
cachedbs.Has(blocks.NewBlock([]byte(fmt.Sprintf("data: %d", i+2000))).Key())
}
if float64(cacheFails)/float64(1000) > float64(0.05) {
t.Fatal("Bloom filter has cache miss rate of more than 5%")
}
}
type callbackDatastore struct {
f func()

View File

@ -1,78 +0,0 @@
package blockstore
import (
"github.com/ipfs/go-ipfs/blocks"
key "github.com/ipfs/go-ipfs/blocks/key"
"gx/ipfs/QmVYxfoJQiZijTgPNHCHgHELvQpbsJNTg6Crmc3dQkj3yy/golang-lru"
context "gx/ipfs/QmZy2y8t9zQH2a1b8q2ZSLKp17ATuJoCNxxyMFG5qFExpt/go-net/context"
)
// WriteCached returns a blockstore that caches up to |size| unique writes (bs.Put).
func WriteCached(bs Blockstore, size int) (*writecache, error) {
c, err := lru.New(size)
if err != nil {
return nil, err
}
return &writecache{blockstore: bs, cache: c}, nil
}
type writecache struct {
cache *lru.Cache // pointer b/c Cache contains a Mutex as value (complicates copying)
blockstore Blockstore
}
func (w *writecache) DeleteBlock(k key.Key) error {
defer log.EventBegin(context.TODO(), "writecache.BlockRemoved", &k).Done()
w.cache.Remove(k)
return w.blockstore.DeleteBlock(k)
}
func (w *writecache) Has(k key.Key) (bool, error) {
if _, ok := w.cache.Get(k); ok {
return true, nil
}
return w.blockstore.Has(k)
}
func (w *writecache) Get(k key.Key) (blocks.Block, error) {
return w.blockstore.Get(k)
}
func (w *writecache) Put(b blocks.Block) error {
k := b.Key()
if _, ok := w.cache.Get(k); ok {
return nil
}
defer log.EventBegin(context.TODO(), "writecache.BlockAdded", &k).Done()
w.cache.Add(b.Key(), struct{}{})
return w.blockstore.Put(b)
}
func (w *writecache) PutMany(bs []blocks.Block) error {
var good []blocks.Block
for _, b := range bs {
if _, ok := w.cache.Get(b.Key()); !ok {
good = append(good, b)
k := b.Key()
defer log.EventBegin(context.TODO(), "writecache.BlockAdded", &k).Done()
}
}
return w.blockstore.PutMany(good)
}
func (w *writecache) AllKeysChan(ctx context.Context) (<-chan key.Key, error) {
return w.blockstore.AllKeysChan(ctx)
}
func (w *writecache) GCLock() Unlocker {
return w.blockstore.(GCBlockstore).GCLock()
}
func (w *writecache) PinLock() Unlocker {
return w.blockstore.(GCBlockstore).PinLock()
}
func (w *writecache) GCRequested() bool {
return w.blockstore.(GCBlockstore).GCRequested()
}

View File

@ -131,7 +131,7 @@ func setupNode(ctx context.Context, n *IpfsNode, cfg *BuildCfg) error {
var err error
bs := bstore.NewBlockstore(n.Repo.Datastore())
n.Blockstore, err = bstore.WriteCached(bs, kSizeBlockstoreWriteCache)
n.Blockstore, err = bstore.BloomCached(bs, 256*1024, kSizeBlockstoreWriteCache)
if err != nil {
return err
}

View File

@ -87,12 +87,13 @@ func (i *Instance) SetBlockstoreLatency(t time.Duration) time.Duration {
// just a much better idea.
func Session(ctx context.Context, net tn.Network, p testutil.Identity) Instance {
bsdelay := delay.Fixed(0)
const bloomSize = 512
const writeCacheElems = 100
adapter := net.Adapter(p)
dstore := ds_sync.MutexWrap(datastore2.WithDelay(ds.NewMapDatastore(), bsdelay))
bstore, err := blockstore.WriteCached(blockstore.NewBlockstore(ds_sync.MutexWrap(dstore)), writeCacheElems)
bstore, err := blockstore.BloomCached(blockstore.NewBlockstore(ds_sync.MutexWrap(dstore)), bloomSize, writeCacheElems)
if err != nil {
panic(err.Error()) // FIXME perhaps change signature and return error.
}

View File

@ -177,6 +177,12 @@
"hash": "Qmb1DA2A9LS2wR4FFweB4uEDomFsdmnw1VLawLE1yQzudj",
"name": "base32",
"version": "0.0.0"
},
{
"author": "kubuxu",
"hash": "QmWQ2SJisXwcCLsUXLwYCKSfyExXjFRW2WbBH5sqCUnwX5",
"name": "bbloom",
"version": "0.0.2"
}
],
"gxVersion": "0.4.0",