1
0
mirror of https://github.com/ipfs/kubo.git synced 2025-06-27 16:07:42 +08:00

Remove whyrusleeping/chunker from godeps

License: MIT
Signed-off-by: Hector Sanjuan <hector@protocol.ai>
This commit is contained in:
Hector Sanjuan
2018-02-05 21:25:15 +01:00
parent c613fbec38
commit 7aa1ab5368
9 changed files with 0 additions and 1457 deletions

4
Godeps/Godeps.json generated
View File

@ -56,10 +56,6 @@
{
"ImportPath": "github.com/texttheater/golang-levenshtein/levenshtein",
"Rev": "dfd657628c58d3eeaa26391097853b2473c8b94e"
},
{
"ImportPath": "github.com/whyrusleeping/chunker",
"Rev": "537e901819164627ca4bb5ce4e3faa8ce7956564"
}
]
}

View File

@ -1,10 +0,0 @@
language: go
sudo: false
go:
- 1.3.3
- 1.4.2
os:
- linux
- osx

View File

@ -1,23 +0,0 @@
Copyright (c) 2014, Alexander Neumann <alexander@bumpern.de>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,7 +0,0 @@
[![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker)
Content Defined Chunking (CDC) based on a rolling Rabin Checksum.
Part of https://github.com/restic/restic.
Better README will follow soon.

View File

@ -1,370 +0,0 @@
package chunker
import (
"errors"
"hash"
"io"
"math"
"sync"
)
const (
KiB = 1024
MiB = 1024 * KiB
// WindowSize is the size of the sliding window.
windowSize = 16
chunkerBufSize = 512 * KiB
)
var bufPool = sync.Pool{
New: func() interface{} { return make([]byte, chunkerBufSize) },
}
type tables struct {
out [256]Pol
mod [256]Pol
}
// cache precomputed tables, these are read-only anyway
var cache struct {
entries map[Pol]*tables
sync.Mutex
}
func init() {
cache.entries = make(map[Pol]*tables)
}
// Chunk is one content-dependent chunk of bytes whose end was cut when the
// Rabin Fingerprint had the value stored in Cut.
type Chunk struct {
Start uint64
Length uint64
Cut uint64
Digest []byte
Data []byte
}
func (c Chunk) Reader(r io.ReaderAt) io.Reader {
return io.NewSectionReader(r, int64(c.Start), int64(c.Length))
}
// Chunker splits content with Rabin Fingerprints.
type Chunker struct {
pol Pol
polShift uint64
tables *tables
rd io.Reader
closed bool
chunkbuf []byte
window [windowSize]byte
wpos int
buf []byte
bpos uint64
bmax uint64
start uint64
count uint64
pos uint64
pre uint64 // wait for this many bytes before start calculating an new chunk
digest uint64
h hash.Hash
sizeMask uint64
// minimal and maximal size of the outputted blocks
MinSize uint64
MaxSize uint64
}
// New returns a new Chunker based on polynomial p that reads from rd
// with bufsize and pass all data to hash along the way.
func New(rd io.Reader, pol Pol, h hash.Hash, avSize, min, max uint64) *Chunker {
sizepow := uint(math.Log2(float64(avSize)))
c := &Chunker{
buf: bufPool.Get().([]byte),
h: h,
pol: pol,
rd: rd,
chunkbuf: make([]byte, 0, max),
sizeMask: (1 << sizepow) - 1,
MinSize: min,
MaxSize: max,
}
c.reset()
return c
}
func (c *Chunker) reset() {
c.polShift = uint64(c.pol.Deg() - 8)
c.fillTables()
for i := 0; i < windowSize; i++ {
c.window[i] = 0
}
c.closed = false
c.digest = 0
c.wpos = 0
c.count = 0
c.slide(1)
c.start = c.pos
if c.h != nil {
c.h.Reset()
}
// do not start a new chunk unless at least MinSize bytes have been read
c.pre = c.MinSize - windowSize
}
// Calculate out_table and mod_table for optimization. Must be called only
// once. This implementation uses a cache in the global variable cache.
func (c *Chunker) fillTables() {
// if polynomial hasn't been specified, do not compute anything for now
if c.pol == 0 {
return
}
// test if the tables are cached for this polynomial
cache.Lock()
defer cache.Unlock()
if t, ok := cache.entries[c.pol]; ok {
c.tables = t
return
}
// else create a new entry
c.tables = &tables{}
cache.entries[c.pol] = c.tables
// calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0)
// \ windowsize-1 zero bytes /
// To slide out byte b_0 for window size w with known hash
// H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]:
// H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0)
// = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0)
// = H( 0 || b_1 || ... || b_w)
//
// Afterwards a new byte can be shifted in.
for b := 0; b < 256; b++ {
var h Pol
h = appendByte(h, byte(b), c.pol)
for i := 0; i < windowSize-1; i++ {
h = appendByte(h, 0, c.pol)
}
c.tables.out[b] = h
}
// calculate table for reduction mod Polynomial
k := c.pol.Deg()
for b := 0; b < 256; b++ {
// mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and B = b(x) * x^k
//
// The 8 bits above deg(Polynomial) determine what happens next and so
// these bits are used as a lookup to this table. The value is split in
// two parts: Part A contains the result of the modulus operation, part
// B is used to cancel out the 8 top bits so that one XOR operation is
// enough to reduce modulo Polynomial
c.tables.mod[b] = Pol(uint64(b)<<uint64(k)).Mod(c.pol) | (Pol(b) << uint64(k))
}
}
func (c *Chunker) nextBytes() []byte {
data := dupBytes(c.chunkbuf[:c.count])
n := copy(c.chunkbuf, c.chunkbuf[c.count:])
c.chunkbuf = c.chunkbuf[:n]
return data
}
// Next returns the position and length of the next chunk of data. If an error
// occurs while reading, the error is returned with a nil chunk. The state of
// the current chunk is undefined. When the last chunk has been returned, all
// subsequent calls yield a nil chunk and an io.EOF error.
func (c *Chunker) Next() (*Chunk, error) {
if c.tables == nil {
return nil, errors.New("polynomial is not set")
}
for {
if c.bpos >= c.bmax {
n, err := io.ReadFull(c.rd, c.buf[:])
c.chunkbuf = append(c.chunkbuf, c.buf[:n]...)
if err == io.ErrUnexpectedEOF {
err = nil
}
// io.ReadFull only returns io.EOF when no bytes could be read. If
// this is the case and we're in this branch, there are no more
// bytes to buffer, so this was the last chunk. If a different
// error has occurred, return that error and abandon the current
// chunk.
if err == io.EOF && !c.closed {
c.closed = true
// return the buffer to the pool
bufPool.Put(c.buf)
data := c.nextBytes()
// return current chunk, if any bytes have been processed
if c.count > 0 {
return &Chunk{
Start: c.start,
Length: c.count,
Cut: c.digest,
Digest: c.hashDigest(),
Data: data,
}, nil
}
}
if err != nil {
return nil, err
}
c.bpos = 0
c.bmax = uint64(n)
}
// check if bytes have to be dismissed before starting a new chunk
if c.pre > 0 {
n := c.bmax - c.bpos
if c.pre > uint64(n) {
c.pre -= uint64(n)
c.updateHash(c.buf[c.bpos:c.bmax])
c.count += uint64(n)
c.pos += uint64(n)
c.bpos = c.bmax
continue
}
c.updateHash(c.buf[c.bpos : c.bpos+c.pre])
c.bpos += c.pre
c.count += c.pre
c.pos += c.pre
c.pre = 0
}
add := c.count
for _, b := range c.buf[c.bpos:c.bmax] {
// inline c.slide(b) and append(b) to increase performance
out := c.window[c.wpos]
c.window[c.wpos] = b
c.digest ^= uint64(c.tables.out[out])
c.wpos = (c.wpos + 1) % windowSize
// c.append(b)
index := c.digest >> c.polShift
c.digest <<= 8
c.digest |= uint64(b)
c.digest ^= uint64(c.tables.mod[index])
// end inline
add++
if add < c.MinSize {
continue
}
if (c.digest&c.sizeMask) == 0 || add >= c.MaxSize {
i := add - c.count - 1
c.updateHash(c.buf[c.bpos : c.bpos+uint64(i)+1])
c.count = add
c.pos += uint64(i) + 1
c.bpos += uint64(i) + 1
data := c.nextBytes()
chunk := &Chunk{
Start: c.start,
Length: c.count,
Cut: c.digest,
Digest: c.hashDigest(),
Data: data,
}
c.reset()
return chunk, nil
}
}
steps := c.bmax - c.bpos
if steps > 0 {
c.updateHash(c.buf[c.bpos : c.bpos+steps])
}
c.count += steps
c.pos += steps
c.bpos = c.bmax
}
}
func dupBytes(b []byte) []byte {
out := make([]byte, len(b))
copy(out, b)
return out
}
func (c *Chunker) updateHash(data []byte) {
if c.h != nil {
// the hashes from crypto/sha* do not return an error
_, err := c.h.Write(data)
if err != nil {
panic(err)
}
}
}
func (c *Chunker) hashDigest() []byte {
if c.h == nil {
return nil
}
return c.h.Sum(nil)
}
func (c *Chunker) append(b byte) {
index := c.digest >> c.polShift
c.digest <<= 8
c.digest |= uint64(b)
c.digest ^= uint64(c.tables.mod[index])
}
func (c *Chunker) slide(b byte) {
out := c.window[c.wpos]
c.window[c.wpos] = b
c.digest ^= uint64(c.tables.out[out])
c.wpos = (c.wpos + 1) % windowSize
c.append(b)
}
func appendByte(hash Pol, b byte, pol Pol) Pol {
hash <<= 8
hash |= Pol(b)
return hash.Mod(pol)
}

View File

@ -1,298 +0,0 @@
package chunker_test
import (
"bytes"
"crypto/md5"
"crypto/sha256"
"encoding/hex"
"hash"
"io"
"io/ioutil"
"math/rand"
"testing"
"time"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
)
func parseDigest(s string) []byte {
d, err := hex.DecodeString(s)
if err != nil {
panic(err)
}
return d
}
type chunk struct {
Length uint
CutFP uint64
Digest []byte
}
// polynomial used for all the tests below
const testPol = chunker.Pol(0x3DA3358B4DC173)
// created for 32MB of random data out of math/rand's Uint32() seeded by
// constant 23
//
// chunking configuration:
// window size 64, avg chunksize 1<<20, min chunksize 1<<19, max chunksize 1<<23
// polynom 0x3DA3358B4DC173
var chunks1 = []chunk{
chunk{2163460, 0x000b98d4cdf00000, parseDigest("4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d")},
chunk{643703, 0x000d4e8364d00000, parseDigest("5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407")},
chunk{1528956, 0x0015a25c2ef00000, parseDigest("a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba")},
chunk{1955808, 0x00102a8242e00000, parseDigest("c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824")},
chunk{2222372, 0x00045da878000000, parseDigest("6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56")},
chunk{2538687, 0x00198a8179900000, parseDigest("8687937412f654b5cfe4a82b08f28393a0c040f77c6f95e26742c2fc4254bfde")},
chunk{609606, 0x001d4e8d17100000, parseDigest("5da820742ff5feb3369112938d3095785487456f65a8efc4b96dac4be7ebb259")},
chunk{1205738, 0x000a7204dd600000, parseDigest("cc70d8fad5472beb031b1aca356bcab86c7368f40faa24fe5f8922c6c268c299")},
chunk{959742, 0x00183e71e1400000, parseDigest("4065bdd778f95676c92b38ac265d361f81bff17d76e5d9452cf985a2ea5a4e39")},
chunk{4036109, 0x001fec043c700000, parseDigest("b9cf166e75200eb4993fc9b6e22300a6790c75e6b0fc8f3f29b68a752d42f275")},
chunk{1525894, 0x000b1574b1500000, parseDigest("2f238180e4ca1f7520a05f3d6059233926341090f9236ce677690c1823eccab3")},
chunk{1352720, 0x00018965f2e00000, parseDigest("afd12f13286a3901430de816e62b85cc62468c059295ce5888b76b3af9028d84")},
chunk{811884, 0x00155628aa100000, parseDigest("42d0cdb1ee7c48e552705d18e061abb70ae7957027db8ae8db37ec756472a70a")},
chunk{1282314, 0x001909a0a1400000, parseDigest("819721c2457426eb4f4c7565050c44c32076a56fa9b4515a1c7796441730eb58")},
chunk{1318021, 0x001cceb980000000, parseDigest("842eb53543db55bacac5e25cb91e43cc2e310fe5f9acc1aee86bdf5e91389374")},
chunk{948640, 0x0011f7a470a00000, parseDigest("b8e36bf7019bb96ac3fb7867659d2167d9d3b3148c09fe0de45850b8fe577185")},
chunk{645464, 0x00030ce2d9400000, parseDigest("5584bd27982191c3329f01ed846bfd266e96548dfa87018f745c33cfc240211d")},
chunk{533758, 0x0004435c53c00000, parseDigest("4da778a25b72a9a0d53529eccfe2e5865a789116cb1800f470d8df685a8ab05d")},
chunk{1128303, 0x0000c48517800000, parseDigest("08c6b0b38095b348d80300f0be4c5184d2744a17147c2cba5cc4315abf4c048f")},
chunk{800374, 0x000968473f900000, parseDigest("820284d2c8fd243429674c996d8eb8d3450cbc32421f43113e980f516282c7bf")},
chunk{2453512, 0x001e197c92600000, parseDigest("5fa870ed107c67704258e5e50abe67509fb73562caf77caa843b5f243425d853")},
chunk{2651975, 0x000ae6c868000000, parseDigest("181347d2bbec32bef77ad5e9001e6af80f6abcf3576549384d334ee00c1988d8")},
chunk{237392, 0x0000000000000001, parseDigest("fcd567f5d866357a8e299fd5b2359bb2c8157c30395229c4e9b0a353944a7978")},
}
// test if nullbytes are correctly split, even if length is a multiple of MinSize.
var chunks2 = []chunk{
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
}
func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk {
chunks := []*chunker.Chunk{}
pos := uint(0)
for i, chunk := range testChunks {
c, err := chnker.Next()
if err != nil {
t.Fatalf("Error returned with chunk %d: %v", i, err)
}
if c == nil {
t.Fatalf("Nil chunk returned")
}
if c != nil {
if c.Start != pos {
t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
i, pos, c.Start)
}
if c.Length != chunk.Length {
t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
i, chunk.Length, c.Length)
}
if c.Cut != chunk.CutFP {
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
i, len(chunks)-1, chunk.CutFP, c.Cut)
}
if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) {
t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
i, len(chunks)-1, chunk.Digest, c.Digest)
}
pos += c.Length
chunks = append(chunks, c)
}
}
c, err := chnker.Next()
if c != nil {
t.Fatal("additional non-nil chunk returned")
}
if err != io.EOF {
t.Fatal("wrong error returned after last chunk")
}
return chunks
}
func getRandom(seed, count int) []byte {
buf := make([]byte, count)
rnd := rand.New(rand.NewSource(23))
for i := 0; i < count; i += 4 {
r := rnd.Uint32()
buf[i] = byte(r)
buf[i+1] = byte(r >> 8)
buf[i+2] = byte(r >> 16)
buf[i+3] = byte(r >> 24)
}
return buf
}
func TestChunker(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New())
chunks := testWithData(t, ch, chunks1)
// test reader
for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf))
h := sha256.New()
n, err := io.Copy(h, rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(n) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, n)
}
d := h.Sum(nil)
if !bytes.Equal(d, chunks1[i].Digest) {
t.Fatalf("wrong hash returned: expected %02x, got %02x",
chunks1[i].Digest, d)
}
}
// setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
testWithData(t, ch, chunks2)
}
func TestChunkerWithRandomPolynomial(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
// generate a new random polynomial
start := time.Now()
p, err := chunker.RandomPolynomial()
OK(t, err)
t.Logf("generating random polynomial took %v", time.Since(start))
start = time.Now()
ch := chunker.New(bytes.NewReader(buf), p, sha256.New())
t.Logf("creating chunker took %v", time.Since(start))
// make sure that first chunk is different
c, err := ch.Next()
Assert(t, c.Cut != chunks1[0].CutFP,
"Cut point is the same")
Assert(t, c.Length != chunks1[0].Length,
"Length is the same")
Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest),
"Digest is the same")
}
func TestChunkerWithoutHash(t *testing.T) {
// setup data source
buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, nil)
chunks := testWithData(t, ch, chunks1)
// test reader
for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf))
buf2, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(len(buf2)) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, uint(len(buf2)))
}
if uint(len(buf2)) != chunks1[i].Length {
t.Fatalf("wrong number of bytes returned: expected %02x, got %02x",
chunks[i].Length, len(buf2))
}
if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) {
t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
buf[c.Start:c.Start+c.Length], buf2)
}
}
// setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New())
testWithData(t, ch, chunks2)
}
func benchmarkChunker(b *testing.B, hash hash.Hash) {
size := 10 * 1024 * 1024
rd := bytes.NewReader(getRandom(23, size))
b.ResetTimer()
b.SetBytes(int64(size))
var chunks int
for i := 0; i < b.N; i++ {
chunks = 0
rd.Seek(0, 0)
ch := chunker.New(rd, testPol, hash)
for {
_, err := ch.Next()
if err == io.EOF {
break
}
if err != nil {
b.Fatalf("Unexpected error occurred: %v", err)
}
chunks++
}
}
b.Logf("%d chunks, average chunk size: %d bytes", chunks, size/chunks)
}
func BenchmarkChunkerWithSHA256(b *testing.B) {
benchmarkChunker(b, sha256.New())
}
func BenchmarkChunkerWithMD5(b *testing.B) {
benchmarkChunker(b, md5.New())
}
func BenchmarkChunker(b *testing.B) {
benchmarkChunker(b, nil)
}
func BenchmarkNewChunker(b *testing.B) {
p, err := chunker.RandomPolynomial()
OK(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
chunker.New(bytes.NewBuffer(nil), p, nil)
}
}

View File

@ -1,82 +0,0 @@
// Copyright 2014 Alexander Neumann. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package chunker implements Content Defined Chunking (CDC) based on a rolling
Rabin Checksum.
Choosing a Random Irreducible Polynomial
The function RandomPolynomial() returns a new random polynomial of degree 53
for use with the chunker. The degree 53 is chosen because it is the largest
prime below 64-8 = 56, so that the top 8 bits of an uint64 can be used for
optimising calculations in the chunker.
A random polynomial is chosen selecting 64 random bits, masking away bits
64..54 and setting bit 53 to one (otherwise the polynomial is not of the
desired degree) and bit 0 to one (otherwise the polynomial is trivially
reducible), so that 51 bits are chosen at random.
This process is repeated until Irreducible() returns true, then this
polynomials is returned. If this doesn't happen after 1 million tries, the
function returns an error. The probability for selecting an irreducible
polynomial at random is about 7.5% ( (2^53-2)/53 / 2^51), so the probability
that no irreducible polynomial has been found after 100 tries is lower than
0.04%.
Verifying Irreducible Polynomials
During development the results have been verified using the computational
discrete algebra system GAP, which can be obtained from the website at
http://www.gap-system.org/.
For filtering a given list of polynomials in hexadecimal coefficient notation,
the following script can be used:
# create x over F_2 = GF(2)
x := Indeterminate(GF(2), "x");
# test if polynomial is irreducible, i.e. the number of factors is one
IrredPoly := function (poly)
return (Length(Factors(poly)) = 1);
end;;
# create a polynomial in x from the hexadecimal representation of the
# coefficients
Hex2Poly := function (s)
return ValuePol(CoefficientsQadic(IntHexString(s), 2), x);
end;;
# list of candidates, in hex
candidates := [ "3DA3358B4DC173" ];
# create real polynomials
L := List(candidates, Hex2Poly);
# filter and display the list of irreducible polynomials contained in L
Display(Filtered(L, x -> (IrredPoly(x))));
All irreducible polynomials from the list are written to the output.
Background Literature
An introduction to Rabin Fingerprints/Checksums can be found in the following articles:
Michael O. Rabin (1981): "Fingerprinting by Random Polynomials"
http://www.xmailserver.org/rabin.pdf
Ross N. Williams (1993): "A Painless Guide to CRC Error Detection Algorithms"
http://www.zlib.net/crc_v3.txt
Andrei Z. Broder (1993): "Some Applications of Rabin's Fingerprinting Method"
http://www.xmailserver.org/rabin_apps.pdf
Shuhong Gao and Daniel Panario (1997): "Tests and Constructions of Irreducible Polynomials over Finite Fields"
http://www.math.clemson.edu/~sgao/papers/GP97a.pdf
Andrew Kadatch, Bob Jenkins (2007): "Everything we know about CRC but afraid to forget"
http://crcutil.googlecode.com/files/crc-doc.1.0.pdf
*/
package chunker

View File

@ -1,278 +0,0 @@
package chunker
import (
"crypto/rand"
"encoding/binary"
"errors"
"fmt"
"strconv"
)
// Pol is a polynomial from F_2[X].
type Pol uint64
// Add returns x+y.
func (x Pol) Add(y Pol) Pol {
r := Pol(uint64(x) ^ uint64(y))
return r
}
// mulOverflows returns true if the multiplication would overflow uint64.
// Code by Rob Pike, see
// https://groups.google.com/d/msg/golang-nuts/h5oSN5t3Au4/KaNQREhZh0QJ
func mulOverflows(a, b Pol) bool {
if a <= 1 || b <= 1 {
return false
}
c := a.mul(b)
d := c.Div(b)
if d != a {
return true
}
return false
}
func (x Pol) mul(y Pol) Pol {
if x == 0 || y == 0 {
return 0
}
var res Pol
for i := 0; i <= y.Deg(); i++ {
if (y & (1 << uint(i))) > 0 {
res = res.Add(x << uint(i))
}
}
return res
}
// Mul returns x*y. When an overflow occurs, Mul panics.
func (x Pol) Mul(y Pol) Pol {
if mulOverflows(x, y) {
panic("multiplication would overflow uint64")
}
return x.mul(y)
}
// Deg returns the degree of the polynomial x. If x is zero, -1 is returned.
func (x Pol) Deg() int {
// the degree of 0 is -1
if x == 0 {
return -1
}
var mask Pol = (1 << 63)
for i := 63; i >= 0; i-- {
// test if bit i is set
if x&mask > 0 {
// this is the degree of x
return i
}
mask >>= 1
}
// fall-through, return -1
return -1
}
// String returns the coefficients in hex.
func (x Pol) String() string {
return "0x" + strconv.FormatUint(uint64(x), 16)
}
// Expand returns the string representation of the polynomial x.
func (x Pol) Expand() string {
if x == 0 {
return "0"
}
s := ""
for i := x.Deg(); i > 1; i-- {
if x&(1<<uint(i)) > 0 {
s += fmt.Sprintf("+x^%d", i)
}
}
if x&2 > 0 {
s += "+x"
}
if x&1 > 0 {
s += "+1"
}
return s[1:]
}
// DivMod returns x / d = q, and remainder r,
// see https://en.wikipedia.org/wiki/Division_algorithm
func (x Pol) DivMod(d Pol) (Pol, Pol) {
if x == 0 {
return 0, 0
}
if d == 0 {
panic("division by zero")
}
D := d.Deg()
diff := x.Deg() - D
if diff < 0 {
return 0, x
}
var q Pol
for diff >= 0 {
m := d << uint(diff)
q |= (1 << uint(diff))
x = x.Add(m)
diff = x.Deg() - D
}
return q, x
}
// Div returns the integer division result x / d.
func (x Pol) Div(d Pol) Pol {
q, _ := x.DivMod(d)
return q
}
// Mod returns the remainder of x / d
func (x Pol) Mod(d Pol) Pol {
_, r := x.DivMod(d)
return r
}
// I really dislike having a function that does not terminate, so specify a
// really large upper bound for finding a new irreducible polynomial, and
// return an error when no irreducible polynomial has been found within
// randPolMaxTries.
const randPolMaxTries = 1e6
// RandomPolynomial returns a new random irreducible polynomial of degree 53
// (largest prime number below 64-8). There are (2^53-2/53) irreducible
// polynomials of degree 53 in F_2[X], c.f. Michael O. Rabin (1981):
// "Fingerprinting by Random Polynomials", page 4. If no polynomial could be
// found in one million tries, an error is returned.
func RandomPolynomial() (Pol, error) {
for i := 0; i < randPolMaxTries; i++ {
var f Pol
// choose polynomial at random
err := binary.Read(rand.Reader, binary.LittleEndian, &f)
if err != nil {
return 0, err
}
// mask away bits above bit 53
f &= Pol((1 << 54) - 1)
// set highest and lowest bit so that the degree is 53 and the
// polynomial is not trivially reducible
f |= (1 << 53) | 1
// test if f is irreducible
if f.Irreducible() {
return f, nil
}
}
// If this is reached, we haven't found an irreducible polynomial in
// randPolMaxTries. This error is very unlikely to occur.
return 0, errors.New("unable to find new random irreducible polynomial")
}
// GCD computes the Greatest Common Divisor x and f.
func (x Pol) GCD(f Pol) Pol {
if f == 0 {
return x
}
if x == 0 {
return f
}
if x.Deg() < f.Deg() {
x, f = f, x
}
return f.GCD(x.Mod(f))
}
// Irreducible returns true iff x is irreducible over F_2. This function
// uses Ben Or's reducibility test.
//
// For details see "Tests and Constructions of Irreducible Polynomials over
// Finite Fields".
func (x Pol) Irreducible() bool {
for i := 1; i <= x.Deg()/2; i++ {
if x.GCD(qp(uint(i), x)) != 1 {
return false
}
}
return true
}
// MulMod computes x*f mod g
func (x Pol) MulMod(f, g Pol) Pol {
if x == 0 || f == 0 {
return 0
}
var res Pol
for i := 0; i <= f.Deg(); i++ {
if (f & (1 << uint(i))) > 0 {
a := x
for j := 0; j < i; j++ {
a = a.Mul(2).Mod(g)
}
res = res.Add(a).Mod(g)
}
}
return res
}
// qp computes the polynomial (x^(2^p)-x) mod g. This is needed for the
// reducibility test.
func qp(p uint, g Pol) Pol {
num := (1 << p)
i := 1
// start with x
res := Pol(2)
for i < num {
// repeatedly square res
res = res.MulMod(res, g)
i *= 2
}
// add x
return res.Add(2).Mod(g)
}
func (p Pol) MarshalJSON() ([]byte, error) {
buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16)
buf = append(buf, '"')
return buf, nil
}
func (p *Pol) UnmarshalJSON(data []byte) error {
if len(data) < 2 {
return errors.New("invalid string for polynomial")
}
n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64)
if err != nil {
return err
}
*p = Pol(n)
return nil
}

View File

@ -1,385 +0,0 @@
package chunker_test
import (
"strconv"
"testing"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
)
var polAddTests = []struct {
x, y chunker.Pol
sum chunker.Pol
}{
{23, 16, 23 ^ 16},
{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
{0x9a7e30d1e855e0a0, 0x9a7e30d1e855e0a0, 0},
}
func TestPolAdd(t *testing.T) {
for _, test := range polAddTests {
Equals(t, test.sum, test.x.Add(test.y))
Equals(t, test.sum, test.y.Add(test.x))
}
}
func parseBin(s string) chunker.Pol {
i, err := strconv.ParseUint(s, 2, 64)
if err != nil {
panic(err)
}
return chunker.Pol(i)
}
var polMulTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{1, 2, 2},
{
parseBin("1101"),
parseBin("10"),
parseBin("11010"),
},
{
parseBin("1101"),
parseBin("11"),
parseBin("10111"),
},
{
0x40000000,
0x40000000,
0x1000000000000000,
},
{
parseBin("1010"),
parseBin("100100"),
parseBin("101101000"),
},
{
parseBin("100"),
parseBin("11"),
parseBin("1100"),
},
{
parseBin("11"),
parseBin("110101"),
parseBin("1011111"),
},
{
parseBin("10011"),
parseBin("110101"),
parseBin("1100001111"),
},
}
func TestPolMul(t *testing.T) {
for i, test := range polMulTests {
m := test.x.Mul(test.y)
Assert(t, test.res == m,
"TestPolMul failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
m = test.y.Mul(test.x)
Assert(t, test.res == test.y.Mul(test.x),
"TestPolMul failed for %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
func TestPolMulOverflow(t *testing.T) {
defer func() {
// try to recover overflow error
err := recover()
if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
return
} else {
t.Logf("invalid error raised: %v", err)
// re-raise error if not overflow
panic(err)
}
}()
x := chunker.Pol(1 << 63)
x.Mul(2)
t.Fatal("overflow test did not panic")
}
var polDivTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{10, 50, 0},
{0, 1, 0},
{
parseBin("101101000"), // 0x168
parseBin("1010"), // 0xa
parseBin("100100"), // 0x24
},
{2, 2, 1},
{
0x8000000000000000,
0x8000000000000000,
1,
},
{
parseBin("1100"),
parseBin("100"),
parseBin("11"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("110101"),
},
}
func TestPolDiv(t *testing.T) {
for i, test := range polDivTests {
m := test.x.Div(test.y)
Assert(t, test.res == m,
"TestPolDiv failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m)
}
}
var polModTests = []struct {
x, y chunker.Pol
res chunker.Pol
}{
{10, 50, 10},
{0, 1, 0},
{
parseBin("101101001"),
parseBin("1010"),
parseBin("1"),
},
{2, 2, 0},
{
0x8000000000000000,
0x8000000000000000,
0,
},
{
parseBin("1100"),
parseBin("100"),
parseBin("0"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("0"),
},
}
func TestPolModt(t *testing.T) {
for _, test := range polModTests {
Equals(t, test.res, test.x.Mod(test.y))
}
}
func BenchmarkPolDivMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.DivMod(f)
}
}
func BenchmarkPolDiv(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.Div(f)
}
}
func BenchmarkPolMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899)
for i := 0; i < t.N; i++ {
g.Mod(f)
}
}
func BenchmarkPolDeg(t *testing.B) {
f := chunker.Pol(0x3af4b284899)
d := f.Deg()
if d != 41 {
t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
d, 41)
}
for i := 0; i < t.N; i++ {
f.Deg()
}
}
func TestRandomPolynomial(t *testing.T) {
_, err := chunker.RandomPolynomial()
OK(t, err)
}
func BenchmarkRandomPolynomial(t *testing.B) {
for i := 0; i < t.N; i++ {
_, err := chunker.RandomPolynomial()
OK(t, err)
}
}
func TestExpandPolynomial(t *testing.T) {
pol := chunker.Pol(0x3DA3358B4DC173)
s := pol.Expand()
Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s)
}
var polIrredTests = []struct {
f chunker.Pol
irred bool
}{
{0x38f1e565e288df, false},
{0x3DA3358B4DC173, true},
{0x30a8295b9d5c91, false},
{0x255f4350b962cb, false},
{0x267f776110a235, false},
{0x2f4dae10d41227, false},
{0x2482734cacca49, true},
{0x312daf4b284899, false},
{0x29dfb6553d01d1, false},
{0x3548245eb26257, false},
{0x3199e7ef4211b3, false},
{0x362f39017dae8b, false},
{0x200d57aa6fdacb, false},
{0x35e0a4efa1d275, false},
{0x2ced55b026577f, false},
{0x260b012010893d, false},
{0x2df29cbcd59e9d, false},
{0x3f2ac7488bd429, false},
{0x3e5cb1711669fb, false},
{0x226d8de57a9959, false},
{0x3c8de80aaf5835, false},
{0x2026a59efb219b, false},
{0x39dfa4d13fb231, false},
{0x3143d0464b3299, false},
}
func TestPolIrreducible(t *testing.T) {
for _, test := range polIrredTests {
Assert(t, test.f.Irreducible() == test.irred,
"Irreducibility test for Polynomial %v failed: got %v, wanted %v",
test.f, test.f.Irreducible(), test.irred)
}
}
func BenchmarkPolIrreducible(b *testing.B) {
// find first irreducible polynomial
var pol chunker.Pol
for _, test := range polIrredTests {
if test.irred {
pol = test.f
break
}
}
for i := 0; i < b.N; i++ {
Assert(b, pol.Irreducible(),
"Irreducibility test for Polynomial %v failed", pol)
}
}
var polGCDTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
gcd chunker.Pol
}{
{10, 50, 2},
{0, 1, 1},
{
parseBin("101101001"),
parseBin("1010"),
parseBin("1"),
},
{2, 2, 2},
{
parseBin("1010"),
parseBin("11"),
parseBin("11"),
},
{
0x8000000000000000,
0x8000000000000000,
0x8000000000000000,
},
{
parseBin("1100"),
parseBin("101"),
parseBin("11"),
},
{
parseBin("1100001111"),
parseBin("10011"),
parseBin("10011"),
},
{
0x3DA3358B4DC173,
0x3DA3358B4DC173,
0x3DA3358B4DC173,
},
{
0x3DA3358B4DC173,
0x230d2259defd,
1,
},
{
0x230d2259defd,
0x51b492b3eff2,
parseBin("10011"),
},
}
func TestPolGCD(t *testing.T) {
for i, test := range polGCDTests {
gcd := test.f1.GCD(test.f2)
Assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
gcd = test.f2.GCD(test.f1)
Assert(t, test.gcd == gcd,
"GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd)
}
}
var polMulModTests = []struct {
f1 chunker.Pol
f2 chunker.Pol
g chunker.Pol
mod chunker.Pol
}{
{
0x1230,
0x230,
0x55,
0x22,
},
{
0x0eae8c07dbbb3026,
0xd5d6db9de04771de,
0xdd2bda3b77c9,
0x425ae8595b7a,
},
}
func TestPolMulMod(t *testing.T) {
for i, test := range polMulModTests {
mod := test.f1.MulMod(test.f2, test.g)
Assert(t, mod == test.mod,
"MulMod test %d (%+v) failed: got %v, wanted %v",
i, test, mod, test.mod)
}
}