mirror of
https://github.com/ipfs/kubo.git
synced 2025-07-02 03:28:25 +08:00
Make bloom filters simpler
These did not work before, and had some unnecessary complexity. Now the filters use only one hashing function, no bignum arithmetic, and gets the additional bit positions by repeatedly hashing the result of prior hash. Since we're not concerned about crypto hashing here, this should be a win. External interfaces unchanged.
This commit is contained in:
12
Godeps/Godeps.json
generated
12
Godeps/Godeps.json
generated
@ -213,6 +213,10 @@
|
||||
"ImportPath": "github.com/mitchellh/go-homedir",
|
||||
"Rev": "7d2d8c8a4e078ce3c58736ab521a40b37a504c52"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/mtchavez/jenkins",
|
||||
"Rev": "5a816af6ef21ef401bff5e4b7dd255d63400f497"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
|
||||
"Rev": "87e4e645d80ae9c537e8f2dee52b28036a5dd75e"
|
||||
@ -221,6 +225,10 @@
|
||||
"ImportPath": "github.com/syndtr/gosnappy/snappy",
|
||||
"Rev": "156a073208e131d7d2e212cb749feae7c339e846"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/whyrusleeping/go-metrics",
|
||||
"Rev": "1cd8009604ec2238b5a71305a0ecd974066e0e16"
|
||||
},
|
||||
{
|
||||
"ImportPath": "golang.org/x/crypto/blowfish",
|
||||
"Rev": "b7d6bf2c61544745a02f83dec90393985fc3a065"
|
||||
@ -233,10 +241,6 @@
|
||||
"ImportPath": "golang.org/x/net/context",
|
||||
"Rev": "7dbad50ab5b31073856416cdcfeb2796d682f844"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/whyrusleeping/go-metrics",
|
||||
"Rev": "1cd8009604ec2238b5a71305a0ecd974066e0e16"
|
||||
},
|
||||
{
|
||||
"ImportPath": "gopkg.in/fsnotify.v1",
|
||||
"Comment": "v1.2.0",
|
||||
|
23
Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore
generated
vendored
Normal file
23
Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore
generated
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
8
Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml
generated
vendored
Normal file
8
Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml
generated
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
go:
|
||||
- 1.1
|
||||
- tip
|
||||
install:
|
||||
- go get github.com/onsi/ginkgo
|
||||
- go get github.com/onsi/gomega
|
||||
before_script: go test -i ./...
|
||||
script: go test ./...
|
11
Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile
generated
vendored
Normal file
11
Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile
generated
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
build:
|
||||
go build jenkins.go
|
||||
|
||||
run:
|
||||
go run jenkins.go
|
||||
|
||||
test:
|
||||
go test -cover
|
||||
|
||||
default:
|
||||
go run jenkins.go
|
45
Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md
generated
vendored
Normal file
45
Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md
generated
vendored
Normal file
@ -0,0 +1,45 @@
|
||||
Jenkins
|
||||
=================
|
||||
|
||||
Golang Jenkins hash
|
||||
|
||||
[](https://travis-ci.org/mtchavez/go-jenkins-hashes)
|
||||
|
||||
## Install
|
||||
|
||||
`go get -u github.com/mtchavez/jenkins`
|
||||
|
||||
## Usage
|
||||
|
||||
Jenkins follows the [Hash32](http://golang.org/pkg/hash/#Hash32) interface from the Go standard library
|
||||
|
||||
```go
|
||||
// Create a new hash
|
||||
jenkhash := New()
|
||||
|
||||
// Write a string of bytes to hash
|
||||
key := []byte("my-random-key")
|
||||
length, err := jenkhash(key)
|
||||
|
||||
// Get uint32 sum of hash
|
||||
sum := jenkhash.Sum32()
|
||||
|
||||
// Sum hash with byte string
|
||||
sumbytes := jenkhash.Sum(key)
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
Uses [Ginkgo](http://onsi.github.io/ginkgo/) for testing.
|
||||
|
||||
Run via `make test` which will run `go test -cover`
|
||||
|
||||
## Documentation
|
||||
|
||||
Docs on [godoc](http://godoc.org/github.com/mtchavez/jenkins)
|
||||
|
||||
## License
|
||||
|
||||
Written by Chavez
|
||||
|
||||
Released under the MIT License: http://www.opensource.org/licenses/mit-license.php
|
48
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go
generated
vendored
Normal file
48
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go
generated
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
package jenkins
|
||||
|
||||
import "hash"
|
||||
|
||||
type jenkhash uint32
|
||||
|
||||
func New() hash.Hash32 {
|
||||
var j jenkhash = 0
|
||||
return &j
|
||||
}
|
||||
|
||||
func (j *jenkhash) Write(key []byte) (int, error) {
|
||||
hash := *j
|
||||
|
||||
for _, b := range key {
|
||||
hash += jenkhash(b)
|
||||
hash += (hash << 10)
|
||||
hash ^= (hash >> 6)
|
||||
}
|
||||
|
||||
hash += (hash << 3)
|
||||
hash ^= (hash >> 11)
|
||||
hash += (hash << 15)
|
||||
|
||||
*j = hash
|
||||
return len(key), nil
|
||||
}
|
||||
|
||||
func (j *jenkhash) Reset() {
|
||||
*j = 0
|
||||
}
|
||||
|
||||
func (j *jenkhash) Size() int {
|
||||
return 4
|
||||
}
|
||||
|
||||
func (j *jenkhash) BlockSize() int {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (j *jenkhash) Sum32() uint32 {
|
||||
return uint32(*j)
|
||||
}
|
||||
|
||||
func (j *jenkhash) Sum(in []byte) []byte {
|
||||
v := j.Sum32()
|
||||
return append(in, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
13
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go
generated
vendored
Normal file
13
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go
generated
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
package jenkins
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestJenkins(t *testing.T) {
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "Jenkins Suite")
|
||||
}
|
101
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go
generated
vendored
Normal file
101
Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go
generated
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
package jenkins
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
"hash"
|
||||
)
|
||||
|
||||
var _ = Describe("Jenkins", func() {
|
||||
|
||||
var jhash hash.Hash32
|
||||
var key []byte
|
||||
|
||||
BeforeEach(func() {
|
||||
jhash = New()
|
||||
key = []byte("Apple")
|
||||
})
|
||||
|
||||
Describe("New", func() {
|
||||
|
||||
It("returns jenkhash", func() {
|
||||
var h *jenkhash
|
||||
Expect(jhash).To(BeAssignableToTypeOf(h))
|
||||
})
|
||||
|
||||
It("initializes offset to 0", func() {
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Write", func() {
|
||||
|
||||
It("returns key length", func() {
|
||||
length, _ := jhash.Write(key)
|
||||
Expect(length).To(Equal(5))
|
||||
})
|
||||
|
||||
It("has no error", func() {
|
||||
_, err := jhash.Write(key)
|
||||
Expect(err).To(BeNil())
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Reset", func() {
|
||||
|
||||
It("sets back to 0", func() {
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
jhash.Write(key)
|
||||
Expect(jhash.Sum32()).NotTo(Equal(uint32(0)))
|
||||
jhash.Reset()
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Size", func() {
|
||||
|
||||
It("is 4", func() {
|
||||
Expect(jhash.Size()).To(Equal(4))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("BlockSize", func() {
|
||||
|
||||
It("is 1", func() {
|
||||
Expect(jhash.BlockSize()).To(Equal(1))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Sum32", func() {
|
||||
|
||||
It("defaults to 0", func() {
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(0)))
|
||||
})
|
||||
|
||||
It("sums hash", func() {
|
||||
jhash.Write(key)
|
||||
Expect(jhash.Sum32()).To(Equal(uint32(884782484)))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Describe("Sum", func() {
|
||||
|
||||
It("default 0 hash byte returned", func() {
|
||||
expected := []byte{0x41, 0x70, 0x70, 0x6c, 0x65, 0x0, 0x0, 0x0, 0x0}
|
||||
Expect(jhash.Sum(key)).To(Equal(expected))
|
||||
})
|
||||
|
||||
It("returns sum byte array", func() {
|
||||
jhash.Write(key)
|
||||
expected := []byte{0x41, 0x70, 0x70, 0x6c, 0x65, 0x34, 0xbc, 0xb5, 0x94}
|
||||
Expect(jhash.Sum(key)).To(Equal(expected))
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
})
|
@ -2,13 +2,11 @@
|
||||
package bloom
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
// Non crypto hash, because speed
|
||||
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins"
|
||||
"hash"
|
||||
"hash/adler32"
|
||||
"hash/crc32"
|
||||
"hash/fnv"
|
||||
"math/big"
|
||||
)
|
||||
|
||||
type Filter interface {
|
||||
@ -17,61 +15,66 @@ type Filter interface {
|
||||
Merge(Filter) (Filter, error)
|
||||
}
|
||||
|
||||
func BasicFilter() Filter {
|
||||
// Non crypto hashes, because speed
|
||||
return NewFilter(2048, adler32.New(), fnv.New32(), crc32.NewIEEE())
|
||||
}
|
||||
|
||||
func NewFilter(size int, hashes ...hash.Hash) Filter {
|
||||
func NewFilter(size int) Filter {
|
||||
return &filter{
|
||||
hash: jenkins.New(),
|
||||
filter: make([]byte, size),
|
||||
hashes: hashes,
|
||||
k: 3,
|
||||
}
|
||||
}
|
||||
|
||||
type filter struct {
|
||||
filter []byte
|
||||
hashes []hash.Hash
|
||||
hash hash.Hash32
|
||||
k int
|
||||
}
|
||||
|
||||
func (f *filter) Add(k []byte) {
|
||||
for _, h := range f.hashes {
|
||||
i := bytesMod(h.Sum(k), int64(len(f.filter)*8))
|
||||
f.setBit(i)
|
||||
func BasicFilter() Filter {
|
||||
return NewFilter(2048)
|
||||
}
|
||||
|
||||
func (f *filter) Add(bytes []byte) {
|
||||
for _, bit := range f.getBitIndicies(bytes) {
|
||||
f.setBit(bit)
|
||||
}
|
||||
}
|
||||
|
||||
func (f *filter) Find(k []byte) bool {
|
||||
for _, h := range f.hashes {
|
||||
i := bytesMod(h.Sum(k), int64(len(f.filter)*8))
|
||||
if !f.getBit(i) {
|
||||
func (f *filter) getBitIndicies(bytes []byte) []uint32 {
|
||||
indicies := make([]uint32, f.k)
|
||||
|
||||
f.hash.Write(bytes)
|
||||
b := make([]byte, 4)
|
||||
|
||||
for i := 0; i < f.k; i++ {
|
||||
res := f.hash.Sum32()
|
||||
indicies[i] = res % (uint32(len(f.filter)) * 8)
|
||||
|
||||
binary.LittleEndian.PutUint32(b, res)
|
||||
f.hash.Write(b)
|
||||
}
|
||||
|
||||
f.hash.Reset()
|
||||
|
||||
return indicies
|
||||
}
|
||||
|
||||
func (f *filter) Find(bytes []byte) bool {
|
||||
for _, bit := range f.getBitIndicies(bytes) {
|
||||
if !f.getBit(bit) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (f *filter) setBit(i int64) {
|
||||
fmt.Printf("setting bit %d\n", i)
|
||||
func (f *filter) setBit(i uint32) {
|
||||
f.filter[i/8] |= (1 << byte(i%8))
|
||||
}
|
||||
|
||||
func (f *filter) getBit(i int64) bool {
|
||||
fmt.Printf("getting bit %d\n", i)
|
||||
func (f *filter) getBit(i uint32) bool {
|
||||
return f.filter[i/8]&(1<<byte(i%8)) != 0
|
||||
}
|
||||
|
||||
func bytesMod(b []byte, modulo int64) int64 {
|
||||
i := big.NewInt(0)
|
||||
i = i.SetBytes(b)
|
||||
|
||||
bigmod := big.NewInt(int64(modulo))
|
||||
result := big.NewInt(0)
|
||||
result.Mod(i, bigmod)
|
||||
|
||||
return result.Int64()
|
||||
}
|
||||
|
||||
func (f *filter) Merge(o Filter) (Filter, error) {
|
||||
casfil, ok := o.(*filter)
|
||||
if !ok {
|
||||
@ -82,12 +85,15 @@ func (f *filter) Merge(o Filter) (Filter, error) {
|
||||
return nil, errors.New("filter lengths must match!")
|
||||
}
|
||||
|
||||
if casfil.k != f.k {
|
||||
return nil, errors.New("filter k-values must match!")
|
||||
}
|
||||
|
||||
nfilt := new(filter)
|
||||
|
||||
// this bit is sketchy, need a way of comparing hash functions
|
||||
nfilt.hashes = f.hashes
|
||||
|
||||
nfilt.hash = f.hash
|
||||
nfilt.filter = make([]byte, len(f.filter))
|
||||
nfilt.k = f.k
|
||||
|
||||
for i, v := range f.filter {
|
||||
nfilt.filter[i] = v | casfil.filter[i]
|
||||
}
|
||||
|
@ -1,13 +1,19 @@
|
||||
package bloom
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFilter(t *testing.T) {
|
||||
f := BasicFilter()
|
||||
f := NewFilter(128)
|
||||
|
||||
keys := [][]byte{
|
||||
[]byte("hello"),
|
||||
[]byte("fish"),
|
||||
[]byte("ipfsrocks"),
|
||||
[]byte("i want ipfs socks"),
|
||||
}
|
||||
|
||||
f.Add(keys[0])
|
||||
@ -21,10 +27,54 @@ func TestFilter(t *testing.T) {
|
||||
}
|
||||
|
||||
f.Add(keys[2])
|
||||
f.Add(keys[3])
|
||||
|
||||
for _, k := range keys {
|
||||
if !f.Find(k) {
|
||||
t.Fatal("Couldnt find one of three keys")
|
||||
}
|
||||
}
|
||||
|
||||
if f.Find([]byte("beep boop")) {
|
||||
t.Fatal("Got false positive! Super unlikely!")
|
||||
}
|
||||
|
||||
fmt.Println(f)
|
||||
}
|
||||
|
||||
func TestMerge(t *testing.T) {
|
||||
|
||||
f1 := NewFilter(128)
|
||||
f2 := NewFilter(128)
|
||||
|
||||
fbork := NewFilter(32)
|
||||
|
||||
_, err := f1.Merge(fbork)
|
||||
|
||||
if err == nil {
|
||||
t.Fatal("Merge should fail on filters with different lengths")
|
||||
}
|
||||
|
||||
b := make([]byte, 4)
|
||||
|
||||
var i uint32
|
||||
for i = 0; i < 10; i++ {
|
||||
binary.LittleEndian.PutUint32(b, i)
|
||||
f1.Add(b)
|
||||
}
|
||||
|
||||
for i = 10; i < 20; i++ {
|
||||
binary.LittleEndian.PutUint32(b, i)
|
||||
f2.Add(b)
|
||||
}
|
||||
|
||||
merged, _ := f1.Merge(f2)
|
||||
|
||||
for i = 0; i < 20; i++ {
|
||||
binary.LittleEndian.PutUint32(b, i)
|
||||
|
||||
if !merged.Find(b) {
|
||||
t.Fatal("Could not find all keys in merged filter")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user