mirror of
https://github.com/ipfs/kubo.git
synced 2025-09-10 09:52:20 +08:00
use rabin fingerprinting for a chunker
License: MIT Signed-off-by: Jeromy <jeromyj@gmail.com> implement rabin fingerprinting as a chunker for ipfs License: MIT Signed-off-by: Jeromy <jeromyj@gmail.com> vendor correctly License: MIT Signed-off-by: Jeromy <jeromyj@gmail.com> refactor chunking interface a little License: MIT Signed-off-by: Jeromy <jeromyj@gmail.com> work chunking interface changes up into importer License: MIT Signed-off-by: Jeromy <jeromyj@gmail.com> move chunker type parsing into its own file in chunk License: MIT Signed-off-by: Jeromy <jeromyj@gmail.com>
This commit is contained in:
84
importer/chunk/rabin_test.go
Normal file
84
importer/chunk/rabin_test.go
Normal file
@ -0,0 +1,84 @@
|
||||
package chunk
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/ipfs/go-ipfs/blocks"
|
||||
"github.com/ipfs/go-ipfs/blocks/key"
|
||||
"github.com/ipfs/go-ipfs/util"
|
||||
"io"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRabinChunking(t *testing.T) {
|
||||
data := make([]byte, 1024*1024*16)
|
||||
util.NewTimeSeededRand().Read(data)
|
||||
|
||||
r := NewRabin(bytes.NewReader(data), 1024*256)
|
||||
|
||||
var chunks [][]byte
|
||||
|
||||
for {
|
||||
chunk, err := r.NextBytes()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
chunks = append(chunks, chunk)
|
||||
}
|
||||
|
||||
fmt.Printf("average block size: %d\n", len(data)/len(chunks))
|
||||
|
||||
unchunked := bytes.Join(chunks, nil)
|
||||
if !bytes.Equal(unchunked, data) {
|
||||
fmt.Printf("%d %d\n", len(unchunked), len(data))
|
||||
t.Fatal("data was chunked incorrectly")
|
||||
}
|
||||
}
|
||||
|
||||
func chunkData(t *testing.T, data []byte) map[key.Key]*blocks.Block {
|
||||
r := NewRabin(bytes.NewReader(data), 1024*256)
|
||||
|
||||
blkmap := make(map[key.Key]*blocks.Block)
|
||||
|
||||
for {
|
||||
blk, err := r.NextBytes()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
b := blocks.NewBlock(blk)
|
||||
blkmap[b.Key()] = b
|
||||
}
|
||||
|
||||
return blkmap
|
||||
}
|
||||
|
||||
func TestRabinChunkReuse(t *testing.T) {
|
||||
data := make([]byte, 1024*1024*16)
|
||||
util.NewTimeSeededRand().Read(data)
|
||||
|
||||
ch1 := chunkData(t, data[1000:])
|
||||
ch2 := chunkData(t, data)
|
||||
|
||||
var extra int
|
||||
for k, _ := range ch2 {
|
||||
_, ok := ch1[k]
|
||||
if !ok {
|
||||
extra++
|
||||
}
|
||||
}
|
||||
|
||||
if extra > 2 {
|
||||
t.Fatal("too many spare chunks made")
|
||||
}
|
||||
if extra == 2 {
|
||||
t.Log("why did we get two extra blocks?")
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user