mirror of
https://github.com/ipfs/kubo.git
synced 2025-07-02 03:28:25 +08:00
Add hamming distance calculation to bloom filters
This commit is contained in:
5
Godeps/Godeps.json
generated
5
Godeps/Godeps.json
generated
@ -217,6 +217,11 @@
|
||||
"ImportPath": "github.com/mtchavez/jenkins",
|
||||
"Rev": "5a816af6ef21ef401bff5e4b7dd255d63400f497"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/steakknife/hamming",
|
||||
"Comment": "0.0.2-2-g9ad4a62",
|
||||
"Rev": "9ad4a620e3d573267a083c892f2b42a39302153b"
|
||||
},
|
||||
{
|
||||
"ImportPath": "github.com/syndtr/goleveldb/leveldb",
|
||||
"Rev": "87e4e645d80ae9c537e8f2dee52b28036a5dd75e"
|
||||
|
3
Godeps/_workspace/src/github.com/steakknife/hamming/README.md
generated
vendored
Normal file
3
Godeps/_workspace/src/github.com/steakknife/hamming/README.md
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
Copyright (c) 2014 Barry Allard
|
||||
|
||||
MIT license
|
38
Godeps/_workspace/src/github.com/steakknife/hamming/hamming.go
generated
vendored
Normal file
38
Godeps/_workspace/src/github.com/steakknife/hamming/hamming.go
generated
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
package hamming
|
||||
|
||||
// SSE4.x PopCnt is 10x slower
|
||||
// References: check out Hacker's Delight
|
||||
|
||||
const (
|
||||
m1 uint64 = 0x5555555555555555 //binary: 0101...
|
||||
m2 uint64 = 0x3333333333333333 //binary: 00110011..
|
||||
m4 uint64 = 0x0f0f0f0f0f0f0f0f //binary: 4 zeros, 4 ones ...
|
||||
m8 uint64 = 0x00ff00ff00ff00ff //binary: 8 zeros, 8 ones ...
|
||||
m16 uint64 = 0x0000ffff0000ffff //binary: 16 zeros, 16 ones ...
|
||||
m32 uint64 = 0x00000000ffffffff //binary: 32 zeros, 32 ones
|
||||
hff uint64 = 0xffffffffffffffff //binary: all ones
|
||||
h01 uint64 = 0x0101010101010101 //the sum of 256 to the power of 0,1,2,3...
|
||||
)
|
||||
|
||||
var table = [256]byte{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}
|
||||
|
||||
// hamming distance of two uint64's
|
||||
func Uint64(x, y uint64) int {
|
||||
return CountBitsUint64(x ^ y)
|
||||
}
|
||||
|
||||
// hamming distance of two bytes
|
||||
func Byte(x, y byte) int {
|
||||
return CountBitsByte(x ^ y)
|
||||
}
|
||||
|
||||
func CountBitsUint64(x uint64) int {
|
||||
x -= (x >> 1) & m1 // put count of each 2 bits into those 2 bits
|
||||
x = (x & m2) + ((x >> 2) & m2) // put count of each 4 bits into those 4 bits
|
||||
x = (x + (x >> 4)) & m4 // put count of each 8 bits into those 8 bits
|
||||
return int((x * h01) >> 56) // returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
|
||||
}
|
||||
|
||||
func CountBitsByte(x byte) int {
|
||||
return int(table[x])
|
||||
}
|
88
Godeps/_workspace/src/github.com/steakknife/hamming/hamming_test.go
generated
vendored
Normal file
88
Godeps/_workspace/src/github.com/steakknife/hamming/hamming_test.go
generated
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
package hamming
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
type testCountBitsUint64Case struct {
|
||||
x uint64
|
||||
n int
|
||||
}
|
||||
|
||||
type testCountBitsByteCase struct {
|
||||
x byte
|
||||
n int
|
||||
}
|
||||
|
||||
var testCountBitsByteCases = []testCountBitsByteCase{
|
||||
{0x00, 0},
|
||||
{0x01, 1},
|
||||
{0x02, 1},
|
||||
{0x03, 2},
|
||||
{0xaa, 4},
|
||||
{0x55, 4},
|
||||
{0x7f, 7},
|
||||
{0xff, 8},
|
||||
}
|
||||
|
||||
var testCountBitsUint64Cases = []testCountBitsUint64Case{
|
||||
{0x00, 0},
|
||||
{0x01, 1},
|
||||
{0x02, 1},
|
||||
{0x03, 2},
|
||||
{0xaa, 4},
|
||||
{0x55, 4},
|
||||
{0x7f, 7},
|
||||
{0xff, 8},
|
||||
{0xffff, 16},
|
||||
{0xffffffff, 32},
|
||||
{0x1ffffffff, 33},
|
||||
{0x3ffffffff, 34},
|
||||
{0x7ffffffff, 35},
|
||||
{0xfffffffff, 36},
|
||||
{0x3fffffffffffffff, 62},
|
||||
{0x7fffffffffffffff, 63},
|
||||
{0xffffffffffffffff, 64},
|
||||
}
|
||||
|
||||
func TestCountBitByte(t *testing.T) {
|
||||
for _, c := range testCountBitsByteCases {
|
||||
if actualN := CountBitsByte(c.x); actualN != c.n {
|
||||
t.Fatal("CountBitsByte(", c.x, ") = ", actualN, " != ", c.n)
|
||||
} else {
|
||||
t.Log("CountBitsByte(", c.x, ") == ", c.n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCountBitUint64(t *testing.T) {
|
||||
for _, c := range testCountBitsUint64Cases {
|
||||
if actualN := CountBitsUint64(c.x); actualN != c.n {
|
||||
t.Fatal("CountBitsUint64(", c.x, ") = ", actualN, " != ", c.n)
|
||||
} else {
|
||||
t.Log("CountBitsUint64(", c.x, ") == ", c.n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCountBitsUint64(b *testing.B) {
|
||||
j := 0
|
||||
for i := 0; i < b.N; i++ {
|
||||
CountBitsUint64(testCountBitsUint64Cases[j].x)
|
||||
j++
|
||||
if j == len(testCountBitsUint64Cases) {
|
||||
j = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkCountBitsByte(b *testing.B) {
|
||||
j := 0
|
||||
for i := 0; i < b.N; i++ {
|
||||
CountBitsByte(testCountBitsByteCases[j].x)
|
||||
j++
|
||||
if j == len(testCountBitsByteCases) {
|
||||
j = 0
|
||||
}
|
||||
}
|
||||
}
|
@ -6,6 +6,7 @@ import (
|
||||
"errors"
|
||||
// Non crypto hash, because speed
|
||||
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins"
|
||||
"github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/steakknife/hamming"
|
||||
"hash"
|
||||
)
|
||||
|
||||
@ -13,6 +14,7 @@ type Filter interface {
|
||||
Add([]byte)
|
||||
Find([]byte) bool
|
||||
Merge(Filter) (Filter, error)
|
||||
HammingDistance(Filter) (int, error)
|
||||
}
|
||||
|
||||
func NewFilter(size int) Filter {
|
||||
@ -100,3 +102,23 @@ func (f *filter) Merge(o Filter) (Filter, error) {
|
||||
|
||||
return nfilt, nil
|
||||
}
|
||||
|
||||
func (f *filter) HammingDistance(o Filter) (int, error) {
|
||||
casfil, ok := o.(*filter)
|
||||
if !ok {
|
||||
return 0, errors.New("Unsupported filter type")
|
||||
}
|
||||
|
||||
if len(f.filter) != len(casfil.filter) {
|
||||
return 0, errors.New("filter lengths must match!")
|
||||
}
|
||||
|
||||
acc := 0
|
||||
|
||||
// xor together
|
||||
for i := 0; i < len(f.filter); i++ {
|
||||
acc += hamming.Byte(f.filter[i], casfil.filter[i])
|
||||
}
|
||||
|
||||
return acc, nil
|
||||
}
|
||||
|
@ -78,3 +78,17 @@ func TestMerge(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHamming(t *testing.T) {
|
||||
f1 := NewFilter(128)
|
||||
f2 := NewFilter(128)
|
||||
|
||||
f1.Add([]byte("no collision"))
|
||||
f1.Add([]byte("collision? no!"))
|
||||
|
||||
dist, _ := f1.HammingDistance(f2)
|
||||
|
||||
if dist != 6 {
|
||||
t.Fatal("Should have 6 bit difference")
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user