1
0
mirror of https://github.com/ipfs/kubo.git synced 2025-06-23 13:44:27 +08:00

Merge pull request #5319 from ipfs/feat/extract-unixfs-take-2

delete unixfs code...
This commit is contained in:
Whyrusleeping
2018-08-05 10:57:14 -07:00
committed by GitHub
32 changed files with 0 additions and 7603 deletions

View File

@ -47,9 +47,6 @@ ifneq ($(filter coverage% clean distclean,$(MAKECMDGOALS)),)
include $(dir)/Rules.mk
endif
dir := unixfs/pb
include $(dir)/Rules.mk
dir := pin/internal/pb
include $(dir)/Rules.mk

View File

@ -1,108 +0,0 @@
// Package archive provides utilities to archive and compress a [Unixfs] DAG.
package archive
import (
"bufio"
"compress/gzip"
"context"
"io"
"path"
tar "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/archive/tar"
uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// DefaultBufSize is the buffer size for gets. for now, 1MB, which is ~4 blocks.
// TODO: does this need to be configurable?
var DefaultBufSize = 1048576
type identityWriteCloser struct {
w io.Writer
}
func (i *identityWriteCloser) Write(p []byte) (int, error) {
return i.w.Write(p)
}
func (i *identityWriteCloser) Close() error {
return nil
}
// DagArchive is equivalent to `ipfs getdag $hash | maybe_tar | maybe_gzip`
func DagArchive(ctx context.Context, nd ipld.Node, name string, dag ipld.DAGService, archive bool, compression int) (io.Reader, error) {
cleaned := path.Clean(name)
_, filename := path.Split(cleaned)
// need to connect a writer to a reader
piper, pipew := io.Pipe()
checkErrAndClosePipe := func(err error) bool {
if err != nil {
pipew.CloseWithError(err)
return true
}
return false
}
// use a buffered writer to parallelize task
bufw := bufio.NewWriterSize(pipew, DefaultBufSize)
// compression determines whether to use gzip compression.
maybeGzw, err := newMaybeGzWriter(bufw, compression)
if checkErrAndClosePipe(err) {
return nil, err
}
closeGzwAndPipe := func() {
if err := maybeGzw.Close(); checkErrAndClosePipe(err) {
return
}
if err := bufw.Flush(); checkErrAndClosePipe(err) {
return
}
pipew.Close() // everything seems to be ok.
}
if !archive && compression != gzip.NoCompression {
// the case when the node is a file
dagr, err := uio.NewDagReader(ctx, nd, dag)
if checkErrAndClosePipe(err) {
return nil, err
}
go func() {
if _, err := dagr.WriteTo(maybeGzw); checkErrAndClosePipe(err) {
return
}
closeGzwAndPipe() // everything seems to be ok
}()
} else {
// the case for 1. archive, and 2. not archived and not compressed, in which tar is used anyway as a transport format
// construct the tar writer
w, err := tar.NewWriter(ctx, dag, maybeGzw)
if checkErrAndClosePipe(err) {
return nil, err
}
go func() {
// write all the nodes recursively
if err := w.WriteNode(nd, filename); checkErrAndClosePipe(err) {
return
}
w.Close() // close tar writer
closeGzwAndPipe() // everything seems to be ok
}()
}
return piper, nil
}
func newMaybeGzWriter(w io.Writer, compression int) (io.WriteCloser, error) {
if compression != gzip.NoCompression {
return gzip.NewWriterLevel(w, compression)
}
return &identityWriteCloser{w}, nil
}

View File

@ -1,143 +0,0 @@
// Package tar provides functionality to write a unixfs merkledag
// as a tar archive.
package tar
import (
"archive/tar"
"context"
"fmt"
"io"
"path"
"time"
mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io"
upb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// Writer is a utility structure that helps to write
// unixfs merkledag nodes as a tar archive format.
// It wraps any io.Writer.
type Writer struct {
Dag ipld.DAGService
TarW *tar.Writer
ctx context.Context
}
// NewWriter wraps given io.Writer.
func NewWriter(ctx context.Context, dag ipld.DAGService, w io.Writer) (*Writer, error) {
return &Writer{
Dag: dag,
TarW: tar.NewWriter(w),
ctx: ctx,
}, nil
}
func (w *Writer) writeDir(nd *mdag.ProtoNode, fpath string) error {
dir, err := uio.NewDirectoryFromNode(w.Dag, nd)
if err != nil {
return err
}
if err := writeDirHeader(w.TarW, fpath); err != nil {
return err
}
return dir.ForEachLink(w.ctx, func(l *ipld.Link) error {
child, err := w.Dag.Get(w.ctx, l.Cid)
if err != nil {
return err
}
npath := path.Join(fpath, l.Name)
return w.WriteNode(child, npath)
})
}
func (w *Writer) writeFile(nd *mdag.ProtoNode, fsNode *ft.FSNode, fpath string) error {
if err := writeFileHeader(w.TarW, fpath, fsNode.FileSize()); err != nil {
return err
}
dagr := uio.NewPBFileReader(w.ctx, nd, fsNode, w.Dag)
if _, err := dagr.WriteTo(w.TarW); err != nil {
return err
}
w.TarW.Flush()
return nil
}
// WriteNode adds a node to the archive.
func (w *Writer) WriteNode(nd ipld.Node, fpath string) error {
switch nd := nd.(type) {
case *mdag.ProtoNode:
fsNode, err := ft.FSNodeFromBytes(nd.Data())
if err != nil {
return err
}
switch fsNode.Type() {
case upb.Data_Metadata:
fallthrough
case upb.Data_Directory, upb.Data_HAMTShard:
return w.writeDir(nd, fpath)
case upb.Data_Raw:
fallthrough
case upb.Data_File:
return w.writeFile(nd, fsNode, fpath)
case upb.Data_Symlink:
return writeSymlinkHeader(w.TarW, string(fsNode.Data()), fpath)
default:
return ft.ErrUnrecognizedType
}
case *mdag.RawNode:
if err := writeFileHeader(w.TarW, fpath, uint64(len(nd.RawData()))); err != nil {
return err
}
if _, err := w.TarW.Write(nd.RawData()); err != nil {
return err
}
w.TarW.Flush()
return nil
default:
return fmt.Errorf("nodes of type %T are not supported in unixfs", nd)
}
}
// Close closes the tar writer.
func (w *Writer) Close() error {
return w.TarW.Close()
}
func writeDirHeader(w *tar.Writer, fpath string) error {
return w.WriteHeader(&tar.Header{
Name: fpath,
Typeflag: tar.TypeDir,
Mode: 0777,
ModTime: time.Now(),
// TODO: set mode, dates, etc. when added to unixFS
})
}
func writeFileHeader(w *tar.Writer, fpath string, size uint64) error {
return w.WriteHeader(&tar.Header{
Name: fpath,
Size: int64(size),
Typeflag: tar.TypeReg,
Mode: 0644,
ModTime: time.Now(),
// TODO: set mode, dates, etc. when added to unixFS
})
}
func writeSymlinkHeader(w *tar.Writer, target, fpath string) error {
return w.WriteHeader(&tar.Header{
Name: fpath,
Linkname: target,
Mode: 0777,
Typeflag: tar.TypeSymlink,
})
}

View File

@ -1,530 +0,0 @@
// Package hamt implements a Hash Array Mapped Trie over ipfs merkledag nodes.
// It is implemented mostly as described in the wikipedia article on HAMTs,
// however the table size is variable (usually 256 in our usages) as opposed to
// 32 as suggested in the article. The hash function used is currently
// Murmur3, but this value is configurable (the datastructure reports which
// hash function its using).
//
// The one algorithmic change we implement that is not mentioned in the
// wikipedia article is the collapsing of empty shards.
// Given the following tree: ( '[' = shards, '{' = values )
// [ 'A' ] -> [ 'B' ] -> { "ABC" }
// | L-> { "ABD" }
// L-> { "ASDF" }
// If we simply removed "ABC", we would end up with a tree where shard 'B' only
// has a single child. This causes two issues, the first, is that now we have
// an extra lookup required to get to "ABD". The second issue is that now we
// have a tree that contains only "ABD", but is not the same tree that we would
// get by simply inserting "ABD" into a new tree. To address this, we always
// check for empty shard nodes upon deletion and prune them to maintain a
// consistent tree, independent of insertion order.
package hamt
import (
"context"
"fmt"
"os"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
format "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
upb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb"
bitfield "gx/ipfs/QmTbBs3Y3u5F69XNJzdnnc6SP5GKgcXxCDzx6w8m6piVRT/go-bitfield"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
"gx/ipfs/QmfJHywXQu98UeZtGJBQrPAR6AtmDjjbe3qjTo9piXHPnx/murmur3"
)
const (
// HashMurmur3 is the multiformats identifier for Murmur3
HashMurmur3 uint64 = 0x22
)
// A Shard represents the HAMT. It should be initialized with NewShard().
type Shard struct {
nd *dag.ProtoNode
bitfield bitfield.Bitfield
children []child
tableSize int
tableSizeLg2 int
prefix *cid.Prefix
hashFunc uint64
prefixPadStr string
maxpadlen int
dserv ipld.DAGService
}
// child can either be another shard, or a leaf node value
type child interface {
Link() (*ipld.Link, error)
Label() string
}
// NewShard creates a new, empty HAMT shard with the given size.
func NewShard(dserv ipld.DAGService, size int) (*Shard, error) {
ds, err := makeShard(dserv, size)
if err != nil {
return nil, err
}
ds.nd = new(dag.ProtoNode)
ds.hashFunc = HashMurmur3
return ds, nil
}
func makeShard(ds ipld.DAGService, size int) (*Shard, error) {
lg2s, err := logtwo(size)
if err != nil {
return nil, err
}
maxpadding := fmt.Sprintf("%X", size-1)
return &Shard{
tableSizeLg2: lg2s,
prefixPadStr: fmt.Sprintf("%%0%dX", len(maxpadding)),
maxpadlen: len(maxpadding),
bitfield: bitfield.NewBitfield(size),
tableSize: size,
dserv: ds,
}, nil
}
// NewHamtFromDag creates new a HAMT shard from the given DAG.
func NewHamtFromDag(dserv ipld.DAGService, nd ipld.Node) (*Shard, error) {
pbnd, ok := nd.(*dag.ProtoNode)
if !ok {
return nil, dag.ErrNotProtobuf
}
pbd, err := format.FromBytes(pbnd.Data())
if err != nil {
return nil, err
}
if pbd.GetType() != upb.Data_HAMTShard {
return nil, fmt.Errorf("node was not a dir shard")
}
if pbd.GetHashType() != HashMurmur3 {
return nil, fmt.Errorf("only murmur3 supported as hash function")
}
ds, err := makeShard(dserv, int(pbd.GetFanout()))
if err != nil {
return nil, err
}
ds.nd = pbnd.Copy().(*dag.ProtoNode)
ds.children = make([]child, len(pbnd.Links()))
ds.bitfield.SetBytes(pbd.GetData())
ds.hashFunc = pbd.GetHashType()
ds.prefix = &ds.nd.Prefix
return ds, nil
}
// SetPrefix sets the CID Prefix
func (ds *Shard) SetPrefix(prefix *cid.Prefix) {
ds.prefix = prefix
}
// Prefix gets the CID Prefix, may be nil if unset
func (ds *Shard) Prefix() *cid.Prefix {
return ds.prefix
}
// Node serializes the HAMT structure into a merkledag node with unixfs formatting
func (ds *Shard) Node() (ipld.Node, error) {
out := new(dag.ProtoNode)
out.SetPrefix(ds.prefix)
cindex := 0
// TODO: optimized 'for each set bit'
for i := 0; i < ds.tableSize; i++ {
if !ds.bitfield.Bit(i) {
continue
}
ch := ds.children[cindex]
if ch != nil {
clnk, err := ch.Link()
if err != nil {
return nil, err
}
err = out.AddRawLink(ds.linkNamePrefix(i)+ch.Label(), clnk)
if err != nil {
return nil, err
}
} else {
// child unloaded, just copy in link with updated name
lnk := ds.nd.Links()[cindex]
label := lnk.Name[ds.maxpadlen:]
err := out.AddRawLink(ds.linkNamePrefix(i)+label, lnk)
if err != nil {
return nil, err
}
}
cindex++
}
typ := upb.Data_HAMTShard
data, err := proto.Marshal(&upb.Data{
Type: &typ,
Fanout: proto.Uint64(uint64(ds.tableSize)),
HashType: proto.Uint64(HashMurmur3),
Data: ds.bitfield.Bytes(),
})
if err != nil {
return nil, err
}
out.SetData(data)
err = ds.dserv.Add(context.TODO(), out)
if err != nil {
return nil, err
}
return out, nil
}
type shardValue struct {
key string
val *ipld.Link
}
// Link returns a link to this node
func (sv *shardValue) Link() (*ipld.Link, error) {
return sv.val, nil
}
func (sv *shardValue) Label() string {
return sv.key
}
func hash(val []byte) []byte {
h := murmur3.New64()
h.Write(val)
return h.Sum(nil)
}
// Label for Shards is the empty string, this is used to differentiate them from
// value entries
func (ds *Shard) Label() string {
return ""
}
// Set sets 'name' = nd in the HAMT
func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error {
hv := &hashBits{b: hash([]byte(name))}
err := ds.dserv.Add(ctx, nd)
if err != nil {
return err
}
lnk, err := ipld.MakeLink(nd)
if err != nil {
return err
}
lnk.Name = ds.linkNamePrefix(0) + name
return ds.modifyValue(ctx, hv, name, lnk)
}
// Remove deletes the named entry if it exists, this operation is idempotent.
func (ds *Shard) Remove(ctx context.Context, name string) error {
hv := &hashBits{b: hash([]byte(name))}
return ds.modifyValue(ctx, hv, name, nil)
}
// Find searches for a child node by 'name' within this hamt
func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) {
hv := &hashBits{b: hash([]byte(name))}
var out *ipld.Link
err := ds.getValue(ctx, hv, name, func(sv *shardValue) error {
out = sv.val
return nil
})
if err != nil {
return nil, err
}
return out, nil
}
// getChild returns the i'th child of this shard. If it is cached in the
// children array, it will return it from there. Otherwise, it loads the child
// node from disk.
func (ds *Shard) getChild(ctx context.Context, i int) (child, error) {
if i >= len(ds.children) || i < 0 {
return nil, fmt.Errorf("invalid index passed to getChild (likely corrupt bitfield)")
}
if len(ds.children) != len(ds.nd.Links()) {
return nil, fmt.Errorf("inconsistent lengths between children array and Links array")
}
c := ds.children[i]
if c != nil {
return c, nil
}
return ds.loadChild(ctx, i)
}
// loadChild reads the i'th child node of this shard from disk and returns it
// as a 'child' interface
func (ds *Shard) loadChild(ctx context.Context, i int) (child, error) {
lnk := ds.nd.Links()[i]
if len(lnk.Name) < ds.maxpadlen {
return nil, fmt.Errorf("invalid link name '%s'", lnk.Name)
}
var c child
if len(lnk.Name) == ds.maxpadlen {
nd, err := lnk.GetNode(ctx, ds.dserv)
if err != nil {
return nil, err
}
cds, err := NewHamtFromDag(ds.dserv, nd)
if err != nil {
return nil, err
}
c = cds
} else {
lnk2 := *lnk
c = &shardValue{
key: lnk.Name[ds.maxpadlen:],
val: &lnk2,
}
}
ds.children[i] = c
return c, nil
}
func (ds *Shard) setChild(i int, c child) {
ds.children[i] = c
}
// Link returns a merklelink to this shard node
func (ds *Shard) Link() (*ipld.Link, error) {
nd, err := ds.Node()
if err != nil {
return nil, err
}
err = ds.dserv.Add(context.TODO(), nd)
if err != nil {
return nil, err
}
return ipld.MakeLink(nd)
}
func (ds *Shard) insertChild(idx int, key string, lnk *ipld.Link) error {
if lnk == nil {
return os.ErrNotExist
}
i := ds.indexForBitPos(idx)
ds.bitfield.SetBit(idx)
lnk.Name = ds.linkNamePrefix(idx) + key
sv := &shardValue{
key: key,
val: lnk,
}
ds.children = append(ds.children[:i], append([]child{sv}, ds.children[i:]...)...)
ds.nd.SetLinks(append(ds.nd.Links()[:i], append([]*ipld.Link{nil}, ds.nd.Links()[i:]...)...))
return nil
}
func (ds *Shard) rmChild(i int) error {
if i < 0 || i >= len(ds.children) || i >= len(ds.nd.Links()) {
return fmt.Errorf("hamt: attempted to remove child with out of range index")
}
copy(ds.children[i:], ds.children[i+1:])
ds.children = ds.children[:len(ds.children)-1]
copy(ds.nd.Links()[i:], ds.nd.Links()[i+1:])
ds.nd.SetLinks(ds.nd.Links()[:len(ds.nd.Links())-1])
return nil
}
func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func(*shardValue) error) error {
idx := hv.Next(ds.tableSizeLg2)
if ds.bitfield.Bit(int(idx)) {
cindex := ds.indexForBitPos(idx)
child, err := ds.getChild(ctx, cindex)
if err != nil {
return err
}
switch child := child.(type) {
case *Shard:
return child.getValue(ctx, hv, key, cb)
case *shardValue:
if child.key == key {
return cb(child)
}
}
}
return os.ErrNotExist
}
// EnumLinks collects all links in the Shard.
func (ds *Shard) EnumLinks(ctx context.Context) ([]*ipld.Link, error) {
var links []*ipld.Link
err := ds.ForEachLink(ctx, func(l *ipld.Link) error {
links = append(links, l)
return nil
})
return links, err
}
// ForEachLink walks the Shard and calls the given function.
func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
return ds.walkTrie(ctx, func(sv *shardValue) error {
lnk := sv.val
lnk.Name = sv.key
return f(lnk)
})
}
func (ds *Shard) walkTrie(ctx context.Context, cb func(*shardValue) error) error {
for idx := range ds.children {
c, err := ds.getChild(ctx, idx)
if err != nil {
return err
}
switch c := c.(type) {
case *shardValue:
if err := cb(c); err != nil {
return err
}
case *Shard:
if err := c.walkTrie(ctx, cb); err != nil {
return err
}
default:
return fmt.Errorf("unexpected child type: %#v", c)
}
}
return nil
}
func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val *ipld.Link) error {
idx := hv.Next(ds.tableSizeLg2)
if !ds.bitfield.Bit(idx) {
return ds.insertChild(idx, key, val)
}
cindex := ds.indexForBitPos(idx)
child, err := ds.getChild(ctx, cindex)
if err != nil {
return err
}
switch child := child.(type) {
case *Shard:
err := child.modifyValue(ctx, hv, key, val)
if err != nil {
return err
}
if val == nil {
switch len(child.children) {
case 0:
// empty sub-shard, prune it
// Note: this shouldnt normally ever happen
// in the event of another implementation creates flawed
// structures, this will help to normalize them.
ds.bitfield.UnsetBit(idx)
return ds.rmChild(cindex)
case 1:
nchild, ok := child.children[0].(*shardValue)
if ok {
// sub-shard with a single value element, collapse it
ds.setChild(cindex, nchild)
}
return nil
}
}
return nil
case *shardValue:
if child.key == key {
// value modification
if val == nil {
ds.bitfield.UnsetBit(idx)
return ds.rmChild(cindex)
}
child.val = val
return nil
}
if val == nil {
return os.ErrNotExist
}
// replace value with another shard, one level deeper
ns, err := NewShard(ds.dserv, ds.tableSize)
if err != nil {
return err
}
ns.prefix = ds.prefix
chhv := &hashBits{
b: hash([]byte(child.key)),
consumed: hv.consumed,
}
err = ns.modifyValue(ctx, hv, key, val)
if err != nil {
return err
}
err = ns.modifyValue(ctx, chhv, child.key, child.val)
if err != nil {
return err
}
ds.setChild(cindex, ns)
return nil
default:
return fmt.Errorf("unexpected type for child: %#v", child)
}
}
// indexForBitPos returns the index within the collapsed array corresponding to
// the given bit in the bitset. The collapsed array contains only one entry
// per bit set in the bitfield, and this function is used to map the indices.
func (ds *Shard) indexForBitPos(bp int) int {
return ds.bitfield.OnesBefore(bp)
}
// linkNamePrefix takes in the bitfield index of an entry and returns its hex prefix
func (ds *Shard) linkNamePrefix(idx int) string {
return fmt.Sprintf(ds.prefixPadStr, idx)
}

View File

@ -1,291 +0,0 @@
package hamt
import (
"context"
"fmt"
"math/rand"
"os"
"testing"
"time"
mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
func getNames(prefix string, count int) []string {
out := make([]string, count)
for i := 0; i < count; i++ {
out[i] = fmt.Sprintf("%s%d", prefix, i)
}
return out
}
const (
opAdd = iota
opDel
opFind
)
type testOp struct {
Op int
Val string
}
func stringArrToSet(arr []string) map[string]bool {
out := make(map[string]bool)
for _, s := range arr {
out[s] = true
}
return out
}
// generate two different random sets of operations to result in the same
// ending directory (same set of entries at the end) and execute each of them
// in turn, then compare to ensure the output is the same on each.
func TestOrderConsistency(t *testing.T) {
seed := time.Now().UnixNano()
t.Logf("using seed = %d", seed)
ds := mdtest.Mock()
shardWidth := 1024
keep := getNames("good", 4000)
temp := getNames("tempo", 6000)
ops := genOpSet(seed, keep, temp)
s, err := executeOpSet(t, ds, shardWidth, ops)
if err != nil {
t.Fatal(err)
}
err = validateOpSetCompletion(t, s, keep, temp)
if err != nil {
t.Fatal(err)
}
ops2 := genOpSet(seed+1000, keep, temp)
s2, err := executeOpSet(t, ds, shardWidth, ops2)
if err != nil {
t.Fatal(err)
}
err = validateOpSetCompletion(t, s2, keep, temp)
if err != nil {
t.Fatal(err)
}
nd, err := s.Node()
if err != nil {
t.Fatal(err)
}
nd2, err := s2.Node()
if err != nil {
t.Fatal(err)
}
k := nd.Cid()
k2 := nd2.Cid()
if !k.Equals(k2) {
t.Fatal("got different results: ", k, k2)
}
}
func validateOpSetCompletion(t *testing.T, s *Shard, keep, temp []string) error {
ctx := context.TODO()
for _, n := range keep {
_, err := s.Find(ctx, n)
if err != nil {
return fmt.Errorf("couldnt find %s: %s", n, err)
}
}
for _, n := range temp {
_, err := s.Find(ctx, n)
if err != os.ErrNotExist {
return fmt.Errorf("expected not to find: %s", err)
}
}
return nil
}
func executeOpSet(t *testing.T, ds ipld.DAGService, width int, ops []testOp) (*Shard, error) {
ctx := context.TODO()
s, err := NewShard(ds, width)
if err != nil {
return nil, err
}
e := ft.EmptyDirNode()
ds.Add(ctx, e)
for _, o := range ops {
switch o.Op {
case opAdd:
err := s.Set(ctx, o.Val, e)
if err != nil {
return nil, fmt.Errorf("inserting %s: %s", o.Val, err)
}
case opDel:
err := s.Remove(ctx, o.Val)
if err != nil {
return nil, fmt.Errorf("deleting %s: %s", o.Val, err)
}
case opFind:
_, err := s.Find(ctx, o.Val)
if err != nil {
return nil, fmt.Errorf("finding %s: %s", o.Val, err)
}
}
}
return s, nil
}
func genOpSet(seed int64, keep, temp []string) []testOp {
tempset := stringArrToSet(temp)
allnames := append(keep, temp...)
shuffle(seed, allnames)
var todel []string
var ops []testOp
for {
n := len(allnames) + len(todel)
if n == 0 {
return ops
}
rn := rand.Intn(n)
if rn < len(allnames) {
next := allnames[0]
allnames = allnames[1:]
ops = append(ops, testOp{
Op: opAdd,
Val: next,
})
if tempset[next] {
todel = append(todel, next)
}
} else {
shuffle(seed+100, todel)
next := todel[0]
todel = todel[1:]
ops = append(ops, testOp{
Op: opDel,
Val: next,
})
}
}
}
// executes the given op set with a repl to allow easier debugging
/*func debugExecuteOpSet(ds node.DAGService, width int, ops []testOp) (*Shard, error) {
s, err := NewShard(ds, width)
if err != nil {
return nil, err
}
e := ft.EmptyDirNode()
ds.Add(e)
ctx := context.TODO()
run := 0
opnames := map[int]string{
opAdd: "add",
opDel: "del",
}
mainloop:
for i := 0; i < len(ops); i++ {
o := ops[i]
fmt.Printf("Op %d: %s %s\n", i, opnames[o.Op], o.Val)
for run == 0 {
cmd := readCommand()
parts := strings.Split(cmd, " ")
switch parts[0] {
case "":
run = 1
case "find":
_, err := s.Find(ctx, parts[1])
if err == nil {
fmt.Println("success")
} else {
fmt.Println(err)
}
case "run":
if len(parts) > 1 {
n, err := strconv.Atoi(parts[1])
if err != nil {
panic(err)
}
run = n
} else {
run = -1
}
case "lookop":
for k = 0; k < len(ops); k++ {
if ops[k].Val == parts[1] {
fmt.Printf(" Op %d: %s %s\n", k, opnames[ops[k].Op], parts[1])
}
}
case "restart":
var err error
s, err = NewShard(ds, width)
if err != nil {
panic(err)
}
i = -1
continue mainloop
case "print":
nd, err := s.Node()
if err != nil {
panic(err)
}
printDag(ds, nd.(*dag.ProtoNode), 0)
}
}
run--
switch o.Op {
case opAdd:
err := s.Set(ctx, o.Val, e)
if err != nil {
return nil, fmt.Errorf("inserting %s: %s", o.Val, err)
}
case opDel:
fmt.Println("deleting: ", o.Val)
err := s.Remove(ctx, o.Val)
if err != nil {
return nil, fmt.Errorf("deleting %s: %s", o.Val, err)
}
case opFind:
_, err := s.Find(ctx, o.Val)
if err != nil {
return nil, fmt.Errorf("finding %s: %s", o.Val, err)
}
}
}
return s, nil
}
func readCommand() string {
fmt.Print("> ")
scan := bufio.NewScanner(os.Stdin)
scan.Scan()
return scan.Text()
}*/

View File

@ -1,610 +0,0 @@
package hamt
import (
"context"
"fmt"
"math/rand"
"os"
"sort"
"testing"
"time"
"github.com/ipfs/go-ipfs/dagutils"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
func shuffle(seed int64, arr []string) {
r := rand.New(rand.NewSource(seed))
for i := 0; i < len(arr); i++ {
a := r.Intn(len(arr))
b := r.Intn(len(arr))
arr[a], arr[b] = arr[b], arr[a]
}
}
func makeDir(ds ipld.DAGService, size int) ([]string, *Shard, error) {
return makeDirWidth(ds, size, 256)
}
func makeDirWidth(ds ipld.DAGService, size, width int) ([]string, *Shard, error) {
ctx := context.Background()
s, _ := NewShard(ds, width)
var dirs []string
for i := 0; i < size; i++ {
dirs = append(dirs, fmt.Sprintf("DIRNAME%d", i))
}
shuffle(time.Now().UnixNano(), dirs)
for i := 0; i < len(dirs); i++ {
nd := ft.EmptyDirNode()
ds.Add(ctx, nd)
err := s.Set(ctx, dirs[i], nd)
if err != nil {
return nil, nil, err
}
}
return dirs, s, nil
}
func assertLink(s *Shard, name string, found bool) error {
_, err := s.Find(context.Background(), name)
switch err {
case os.ErrNotExist:
if found {
return err
}
return nil
case nil:
if found {
return nil
}
return fmt.Errorf("expected not to find link named %s", name)
default:
return err
}
}
func assertSerializationWorks(ds ipld.DAGService, s *Shard) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
nd, err := s.Node()
if err != nil {
return err
}
nds, err := NewHamtFromDag(ds, nd)
if err != nil {
return err
}
linksA, err := s.EnumLinks(ctx)
if err != nil {
return err
}
linksB, err := nds.EnumLinks(ctx)
if err != nil {
return err
}
if len(linksA) != len(linksB) {
return fmt.Errorf("links arrays are different sizes")
}
for i, a := range linksA {
b := linksB[i]
if a.Name != b.Name {
return fmt.Errorf("links names mismatch")
}
if a.Cid.String() != b.Cid.String() {
return fmt.Errorf("link hashes dont match")
}
if a.Size != b.Size {
return fmt.Errorf("link sizes not the same")
}
}
return nil
}
func TestBasicSet(t *testing.T) {
ds := mdtest.Mock()
for _, w := range []int{128, 256, 512, 1024, 2048, 4096} {
t.Run(fmt.Sprintf("BasicSet%d", w), func(t *testing.T) {
names, s, err := makeDirWidth(ds, 1000, w)
if err != nil {
t.Fatal(err)
}
ctx := context.Background()
for _, d := range names {
_, err := s.Find(ctx, d)
if err != nil {
t.Fatal(err)
}
}
})
}
}
func TestDirBuilding(t *testing.T) {
ds := mdtest.Mock()
_, _ = NewShard(ds, 256)
_, s, err := makeDir(ds, 200)
if err != nil {
t.Fatal(err)
}
nd, err := s.Node()
if err != nil {
t.Fatal(err)
}
//printDag(ds, nd, 0)
k := nd.Cid()
if k.String() != "QmY89TkSEVHykWMHDmyejSWFj9CYNtvzw4UwnT9xbc4Zjc" {
t.Fatalf("output didnt match what we expected (got %s)", k.String())
}
}
func TestShardReload(t *testing.T) {
ds := mdtest.Mock()
_, _ = NewShard(ds, 256)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
_, s, err := makeDir(ds, 200)
if err != nil {
t.Fatal(err)
}
nd, err := s.Node()
if err != nil {
t.Fatal(err)
}
nds, err := NewHamtFromDag(ds, nd)
if err != nil {
t.Fatal(err)
}
lnks, err := nds.EnumLinks(ctx)
if err != nil {
t.Fatal(err)
}
if len(lnks) != 200 {
t.Fatal("not enough links back")
}
_, err = nds.Find(ctx, "DIRNAME50")
if err != nil {
t.Fatal(err)
}
// Now test roundtrip marshal with no operations
nds, err = NewHamtFromDag(ds, nd)
if err != nil {
t.Fatal(err)
}
ond, err := nds.Node()
if err != nil {
t.Fatal(err)
}
outk := ond.Cid()
ndk := nd.Cid()
if !outk.Equals(ndk) {
printDiff(ds, nd.(*dag.ProtoNode), ond.(*dag.ProtoNode))
t.Fatal("roundtrip serialization failed")
}
}
func TestRemoveElems(t *testing.T) {
ds := mdtest.Mock()
dirs, s, err := makeDir(ds, 500)
if err != nil {
t.Fatal(err)
}
ctx := context.Background()
for i := 0; i < 100; i++ {
err := s.Remove(ctx, fmt.Sprintf("NOTEXIST%d", rand.Int()))
if err != os.ErrNotExist {
t.Fatal("shouldnt be able to remove things that don't exist")
}
}
for _, d := range dirs {
_, err := s.Find(ctx, d)
if err != nil {
t.Fatal(err)
}
}
shuffle(time.Now().UnixNano(), dirs)
for _, d := range dirs {
err := s.Remove(ctx, d)
if err != nil {
t.Fatal(err)
}
}
nd, err := s.Node()
if err != nil {
t.Fatal(err)
}
if len(nd.Links()) > 0 {
t.Fatal("shouldnt have any links here")
}
err = s.Remove(ctx, "doesnt exist")
if err != os.ErrNotExist {
t.Fatal("expected error does not exist")
}
}
func TestSetAfterMarshal(t *testing.T) {
ds := mdtest.Mock()
_, s, err := makeDir(ds, 300)
if err != nil {
t.Fatal(err)
}
ctx := context.Background()
nd, err := s.Node()
if err != nil {
t.Fatal(err)
}
nds, err := NewHamtFromDag(ds, nd)
if err != nil {
t.Fatal(err)
}
empty := ft.EmptyDirNode()
for i := 0; i < 100; i++ {
err := nds.Set(ctx, fmt.Sprintf("moredirs%d", i), empty)
if err != nil {
t.Fatal(err)
}
}
links, err := nds.EnumLinks(ctx)
if err != nil {
t.Fatal(err)
}
if len(links) != 400 {
t.Fatal("expected 400 links")
}
err = assertSerializationWorks(ds, nds)
if err != nil {
t.Fatal(err)
}
}
func TestDuplicateAddShard(t *testing.T) {
ds := mdtest.Mock()
dir, _ := NewShard(ds, 256)
nd := new(dag.ProtoNode)
ctx := context.Background()
err := dir.Set(ctx, "test", nd)
if err != nil {
t.Fatal(err)
}
err = dir.Set(ctx, "test", nd)
if err != nil {
t.Fatal(err)
}
lnks, err := dir.EnumLinks(ctx)
if err != nil {
t.Fatal(err)
}
if len(lnks) != 1 {
t.Fatal("expected only one link")
}
}
func TestLoadFailsFromNonShard(t *testing.T) {
ds := mdtest.Mock()
nd := ft.EmptyDirNode()
_, err := NewHamtFromDag(ds, nd)
if err == nil {
t.Fatal("expected dir shard creation to fail when given normal directory")
}
nd = new(dag.ProtoNode)
_, err = NewHamtFromDag(ds, nd)
if err == nil {
t.Fatal("expected dir shard creation to fail when given normal directory")
}
}
func TestFindNonExisting(t *testing.T) {
ds := mdtest.Mock()
_, s, err := makeDir(ds, 100)
if err != nil {
t.Fatal(err)
}
ctx := context.Background()
for i := 0; i < 200; i++ {
_, err := s.Find(ctx, fmt.Sprintf("notfound%d", i))
if err != os.ErrNotExist {
t.Fatal("expected ErrNotExist")
}
}
}
func TestRemoveElemsAfterMarshal(t *testing.T) {
ds := mdtest.Mock()
dirs, s, err := makeDir(ds, 30)
if err != nil {
t.Fatal(err)
}
ctx := context.Background()
sort.Strings(dirs)
err = s.Remove(ctx, dirs[0])
if err != nil {
t.Fatal(err)
}
out, err := s.Find(ctx, dirs[0])
if err == nil {
t.Fatal("expected error, got: ", out)
}
nd, err := s.Node()
if err != nil {
t.Fatal(err)
}
nds, err := NewHamtFromDag(ds, nd)
if err != nil {
t.Fatal(err)
}
_, err = nds.Find(ctx, dirs[0])
if err == nil {
t.Fatal("expected not to find ", dirs[0])
}
for _, d := range dirs[1:] {
_, err := nds.Find(ctx, d)
if err != nil {
t.Fatal("could not find expected link after unmarshaling")
}
}
for _, d := range dirs[1:] {
err := nds.Remove(ctx, d)
if err != nil {
t.Fatal(err)
}
}
links, err := nds.EnumLinks(ctx)
if err != nil {
t.Fatal(err)
}
if len(links) != 0 {
t.Fatal("expected all links to be removed")
}
err = assertSerializationWorks(ds, nds)
if err != nil {
t.Fatal(err)
}
}
func TestBitfieldIndexing(t *testing.T) {
ds := mdtest.Mock()
s, _ := NewShard(ds, 256)
set := func(i int) {
s.bitfield.SetBit(i)
}
assert := func(i int, val int) {
if s.indexForBitPos(i) != val {
t.Fatalf("expected index %d to be %d", i, val)
}
}
assert(50, 0)
set(4)
set(5)
set(60)
assert(10, 2)
set(3)
assert(10, 3)
assert(1, 0)
assert(100, 4)
set(50)
assert(45, 3)
set(100)
assert(100, 5)
}
// test adding a sharded directory node as the child of another directory node.
// if improperly implemented, the parent hamt may assume the child is a part of
// itself.
func TestSetHamtChild(t *testing.T) {
ctx := context.Background()
ds := mdtest.Mock()
s, _ := NewShard(ds, 256)
e := ft.EmptyDirNode()
ds.Add(ctx, e)
err := s.Set(ctx, "bar", e)
if err != nil {
t.Fatal(err)
}
snd, err := s.Node()
if err != nil {
t.Fatal(err)
}
_, ns, err := makeDir(ds, 50)
if err != nil {
t.Fatal(err)
}
err = ns.Set(ctx, "foo", snd)
if err != nil {
t.Fatal(err)
}
nsnd, err := ns.Node()
if err != nil {
t.Fatal(err)
}
hs, err := NewHamtFromDag(ds, nsnd)
if err != nil {
t.Fatal(err)
}
err = assertLink(hs, "bar", false)
if err != nil {
t.Fatal(err)
}
err = assertLink(hs, "foo", true)
if err != nil {
t.Fatal(err)
}
}
func printDiff(ds ipld.DAGService, a, b *dag.ProtoNode) {
diff, err := dagutils.Diff(context.TODO(), ds, a, b)
if err != nil {
panic(err)
}
for _, d := range diff {
fmt.Println(d)
}
}
func BenchmarkHAMTWalk(b *testing.B) {
ctx := context.Background()
ds := mdtest.Mock()
sh, _ := NewShard(ds, 256)
nd, err := sh.Node()
if err != nil {
b.Fatal(err)
}
err = ds.Add(ctx, nd)
if err != nil {
b.Fatal(err)
}
ds.Add(ctx, ft.EmptyDirNode())
s, err := NewHamtFromDag(ds, nd)
if err != nil {
b.Fatal(err)
}
for j := 0; j < 1000; j++ {
err = s.Set(ctx, fmt.Sprintf("%d", j), ft.EmptyDirNode())
if err != nil {
b.Fatal(err)
}
}
for i := 0; i < b.N; i++ {
cnt := 0
err = s.ForEachLink(ctx, func(l *ipld.Link) error {
cnt++
return nil
})
if err != nil {
b.Fatal(err)
}
if cnt < 1000 {
b.Fatal("expected 100 children")
}
}
}
func BenchmarkHAMTSet(b *testing.B) {
ctx := context.Background()
ds := mdtest.Mock()
sh, _ := NewShard(ds, 256)
nd, err := sh.Node()
if err != nil {
b.Fatal(err)
}
err = ds.Add(ctx, nd)
if err != nil {
b.Fatal(err)
}
ds.Add(ctx, ft.EmptyDirNode())
for i := 0; i < b.N; i++ {
s, err := NewHamtFromDag(ds, nd)
if err != nil {
b.Fatal(err)
}
err = s.Set(context.TODO(), fmt.Sprint(i), ft.EmptyDirNode())
if err != nil {
b.Fatal(err)
}
out, err := s.Node()
if err != nil {
b.Fatal(err)
}
nd = out
}
}
func TestHamtBadSize(t *testing.T) {
_, err := NewShard(nil, 7)
if err == nil {
t.Fatal("should have failed to construct hamt with bad size")
}
}

View File

@ -1,52 +0,0 @@
package hamt
import (
"fmt"
"math/bits"
)
// hashBits is a helper that allows the reading of the 'next n bits' as an integer.
type hashBits struct {
b []byte
consumed int
}
func mkmask(n int) byte {
return (1 << uint(n)) - 1
}
// Next returns the next 'i' bits of the hashBits value as an integer
func (hb *hashBits) Next(i int) int {
curbi := hb.consumed / 8
leftb := 8 - (hb.consumed % 8)
curb := hb.b[curbi]
if i == leftb {
out := int(mkmask(i) & curb)
hb.consumed += i
return out
} else if i < leftb {
a := curb & mkmask(leftb) // mask out the high bits we don't want
b := a & ^mkmask(leftb-i) // mask out the low bits we don't want
c := b >> uint(leftb-i) // shift whats left down
hb.consumed += i
return int(c)
} else {
out := int(mkmask(leftb) & curb)
out <<= uint(i - leftb)
hb.consumed += leftb
out += hb.Next(i - leftb)
return out
}
}
func logtwo(v int) (int, error) {
if v <= 0 {
return 0, fmt.Errorf("hamt size should be a power of two")
}
lg2 := bits.TrailingZeros(uint(v))
if 1<<uint(lg2) != v {
return 0, fmt.Errorf("hamt size should be a power of two")
}
return lg2, nil
}

View File

@ -1,45 +0,0 @@
package hamt
import (
"testing"
)
func TestHashBitsEvenSizes(t *testing.T) {
buf := []byte{255, 127, 79, 45, 116, 99, 35, 17}
hb := hashBits{b: buf}
for _, v := range buf {
if hb.Next(8) != int(v) {
t.Fatal("got wrong numbers back")
}
}
}
func TestHashBitsUneven(t *testing.T) {
buf := []byte{255, 127, 79, 45, 116, 99, 35, 17}
hb := hashBits{b: buf}
v := hb.Next(4)
if v != 15 {
t.Fatal("should have gotten 15: ", v)
}
v = hb.Next(4)
if v != 15 {
t.Fatal("should have gotten 15: ", v)
}
if v := hb.Next(3); v != 3 {
t.Fatalf("expected 3, but got %b", v)
}
if v := hb.Next(3); v != 7 {
t.Fatalf("expected 7, but got %b", v)
}
if v := hb.Next(3); v != 6 {
t.Fatalf("expected 6, but got %b", v)
}
if v := hb.Next(15); v != 20269 {
t.Fatalf("expected 20269, but got %b (%d)", v, v)
}
}

View File

@ -1,334 +0,0 @@
package balanced
import (
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
mrand "math/rand"
"testing"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test"
h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io"
u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util"
chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// TODO: extract these tests and more as a generic layout test suite
func buildTestDag(ds ipld.DAGService, spl chunker.Splitter) (*dag.ProtoNode, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
}
nd, err := Layout(dbp.New(spl))
if err != nil {
return nil, err
}
return nd.(*dag.ProtoNode), nil
}
func getTestDag(t *testing.T, ds ipld.DAGService, size int64, blksize int64) (*dag.ProtoNode, []byte) {
data := make([]byte, size)
u.NewTimeSeededRand().Read(data)
r := bytes.NewReader(data)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(r, blksize))
if err != nil {
t.Fatal(err)
}
return nd, data
}
//Test where calls to read are smaller than the chunk size
func TestSizeBasedSplit(t *testing.T) {
if testing.Short() {
t.SkipNow()
}
testFileConsistency(t, 32*512, 512)
testFileConsistency(t, 32*4096, 4096)
// Uneven offset
testFileConsistency(t, 31*4095, 4096)
}
func testFileConsistency(t *testing.T, nbytes int64, blksize int64) {
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, nbytes, blksize)
r, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
dagrArrComp(t, r, should)
}
func TestBuilderConsistency(t *testing.T) {
testFileConsistency(t, 100000, chunker.DefaultBlockSize)
}
func TestNoChunking(t *testing.T) {
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, 1000, 2000)
r, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
dagrArrComp(t, r, should)
}
func TestTwoChunks(t *testing.T) {
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, 2000, 1000)
r, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
dagrArrComp(t, r, should)
}
func arrComp(a, b []byte) error {
if len(a) != len(b) {
return fmt.Errorf("arrays differ in length. %d != %d", len(a), len(b))
}
for i, v := range a {
if v != b[i] {
return fmt.Errorf("arrays differ at index: %d", i)
}
}
return nil
}
func dagrArrComp(t *testing.T, r io.Reader, should []byte) {
out, err := ioutil.ReadAll(r)
if err != nil {
t.Fatal(err)
}
if err := arrComp(out, should); err != nil {
t.Fatal(err)
}
}
func TestIndirectBlocks(t *testing.T) {
ds := mdtest.Mock()
dag, buf := getTestDag(t, ds, 1024*1024, 512)
reader, err := uio.NewDagReader(context.Background(), dag, ds)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(reader)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(out, buf) {
t.Fatal("Not equal!")
}
}
func TestSeekingBasic(t *testing.T) {
nbytes := int64(10 * 1024)
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, nbytes, 500)
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
start := int64(4000)
n, err := rs.Seek(start, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if n != start {
t.Fatal("Failed to seek to correct offset")
}
dagrArrComp(t, rs, should[start:])
}
func TestSeekToBegin(t *testing.T) {
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, 10*1024, 500)
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
n, err := io.CopyN(ioutil.Discard, rs, 1024*4)
if err != nil {
t.Fatal(err)
}
if n != 4096 {
t.Fatal("Copy didnt copy enough bytes")
}
seeked, err := rs.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if seeked != 0 {
t.Fatal("Failed to seek to beginning")
}
dagrArrComp(t, rs, should)
}
func TestSeekToAlmostBegin(t *testing.T) {
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, 10*1024, 500)
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
n, err := io.CopyN(ioutil.Discard, rs, 1024*4)
if err != nil {
t.Fatal(err)
}
if n != 4096 {
t.Fatal("Copy didnt copy enough bytes")
}
seeked, err := rs.Seek(1, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if seeked != 1 {
t.Fatal("Failed to seek to almost beginning")
}
dagrArrComp(t, rs, should[1:])
}
func TestSeekEnd(t *testing.T) {
nbytes := int64(50 * 1024)
ds := mdtest.Mock()
nd, _ := getTestDag(t, ds, nbytes, 500)
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
seeked, err := rs.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
if seeked != nbytes {
t.Fatal("Failed to seek to end")
}
}
func TestSeekEndSingleBlockFile(t *testing.T) {
nbytes := int64(100)
ds := mdtest.Mock()
nd, _ := getTestDag(t, ds, nbytes, 5000)
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
seeked, err := rs.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
if seeked != nbytes {
t.Fatal("Failed to seek to end")
}
}
func TestSeekingStress(t *testing.T) {
nbytes := int64(1024 * 1024)
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, nbytes, 1000)
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
testbuf := make([]byte, nbytes)
for i := 0; i < 50; i++ {
offset := mrand.Intn(int(nbytes))
l := int(nbytes) - offset
n, err := rs.Seek(int64(offset), io.SeekStart)
if err != nil {
t.Fatal(err)
}
if n != int64(offset) {
t.Fatal("Seek failed to move to correct position")
}
nread, err := rs.Read(testbuf[:l])
if err != nil {
t.Fatal(err)
}
if nread != l {
t.Fatal("Failed to read enough bytes")
}
err = arrComp(testbuf[:l], should[offset:offset+l])
if err != nil {
t.Fatal(err)
}
}
}
func TestSeekingConsistency(t *testing.T) {
nbytes := int64(128 * 1024)
ds := mdtest.Mock()
nd, should := getTestDag(t, ds, nbytes, 500)
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
out := make([]byte, nbytes)
for coff := nbytes - 4096; coff >= 0; coff -= 4096 {
t.Log(coff)
n, err := rs.Seek(coff, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if n != coff {
t.Fatal("wasnt able to seek to the right position")
}
nread, err := rs.Read(out[coff : coff+4096])
if err != nil {
t.Fatal(err)
}
if nread != 4096 {
t.Fatal("didnt read the correct number of bytes")
}
}
err = arrComp(out, should)
if err != nil {
t.Fatal(err)
}
}

View File

@ -1,255 +0,0 @@
// Package balanced provides methods to build balanced DAGs, which are generalistic
// DAGs in which all leaves (nodes representing chunks of data) are at the same
// distance from the root. Nodes can have only a maximum number of children; to be
// able to store more leaf data nodes balanced DAGs are extended by increasing its
// depth (and having more intermediary nodes).
//
// Internal nodes are always represented by UnixFS nodes (of type `File`) encoded
// inside DAG nodes (see the `go-unixfs` package for details of UnixFS). In
// contrast, leaf nodes with data have multiple possible representations: UnixFS
// nodes as above, raw nodes with just the file data (no format) and Filestore
// nodes (that directly link to the file on disk using a format stored on a raw
// node, see the `go-ipfs/filestore` package for details of Filestore.)
//
// In the case the entire file fits into just one node it will be formatted as a
// (single) leaf node (without parent) with the possible representations already
// mentioned. This is the only scenario where the root can be of a type different
// that the UnixFS node.
//
// +-------------+
// | Root 4 |
// +-------------+
// |
// +--------------------------+----------------------------+
// | |
// +-------------+ +-------------+
// | Node 2 | | Node 5 |
// +-------------+ +-------------+
// | |
// +-------------+-------------+ +-------------+
// | | |
// +-------------+ +-------------+ +-------------+
// | Node 1 | | Node 3 | | Node 6 |
// +-------------+ +-------------+ +-------------+
// | | |
// +------+------+ +------+------+ +------+
// | | | | |
// +=========+ +=========+ +=========+ +=========+ +=========+
// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 | | Chunk 5 |
// +=========+ +=========+ +=========+ +=========+ +=========+
//
package balanced
import (
"errors"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// Layout builds a balanced DAG layout. In a balanced DAG of depth 1, leaf nodes
// with data are added to a single `root` until the maximum number of links is
// reached. Then, to continue adding more data leaf nodes, a `newRoot` is created
// pointing to the old `root` (which will now become and intermediary node),
// increasing the depth of the DAG to 2. This will increase the maximum number of
// data leaf nodes the DAG can have (`Maxlinks() ^ depth`). The `fillNodeRec`
// function will add more intermediary child nodes to `newRoot` (which already has
// `root` as child) that in turn will have leaf nodes with data added to them.
// After that process is completed (the maximum number of links is reached),
// `fillNodeRec` will return and the loop will be repeated: the `newRoot` created
// will become the old `root` and a new root will be created again to increase the
// depth of the DAG. The process is repeated until there is no more data to add
// (i.e. the DagBuilderHelpers Done() function returns true).
//
// The nodes are filled recursively, so the DAG is built from the bottom up. Leaf
// nodes are created first using the chunked file data and its size. The size is
// then bubbled up to the parent (internal) node, which aggregates all the sizes of
// its children and bubbles that combined size up to its parent, and so on up to
// the root. This way, a balanced DAG acts like a B-tree when seeking to a byte
// offset in the file the graph represents: each internal node uses the file size
// of its children as an index when seeking.
//
// `Layout` creates a root and hands it off to be filled:
//
// +-------------+
// | Root 1 |
// +-------------+
// |
// ( fillNodeRec fills in the )
// ( chunks on the root. )
// |
// +------+------+
// | |
// + - - - - + + - - - - +
// | Chunk 1 | | Chunk 2 |
// + - - - - + + - - - - +
//
// ↓
// When the root is full but there's more data...
// ↓
//
// +-------------+
// | Root 1 |
// +-------------+
// |
// +------+------+
// | |
// +=========+ +=========+ + - - - - +
// | Chunk 1 | | Chunk 2 | | Chunk 3 |
// +=========+ +=========+ + - - - - +
//
// ↓
// ...Layout's job is to create a new root.
// ↓
//
// +-------------+
// | Root 2 |
// +-------------+
// |
// +-------------+ - - - - - - - - +
// | |
// +-------------+ ( fillNodeRec creates the )
// | Node 1 | ( branch that connects )
// +-------------+ ( "Root 2" to "Chunk 3." )
// | |
// +------+------+ + - - - - -+
// | | |
// +=========+ +=========+ + - - - - +
// | Chunk 1 | | Chunk 2 | | Chunk 3 |
// +=========+ +=========+ + - - - - +
//
func Layout(db *h.DagBuilderHelper) (ipld.Node, error) {
if db.Done() {
// No data, return just an empty node.
root, err := db.NewLeafNode(nil)
if err != nil {
return nil, err
}
// This works without Filestore support (`ProcessFileStore`).
// TODO: Why? Is there a test case missing?
return db.AddNodeAndClose(root)
}
// The first `root` will be a single leaf node with data
// (corner case), after that subsequent `root` nodes will
// always be internal nodes (with a depth > 0) that can
// be handled by the loop.
root, fileSize, err := db.NewLeafDataNode()
if err != nil {
return nil, err
}
// Each time a DAG of a certain `depth` is filled (because it
// has reached its maximum capacity of `db.Maxlinks()` per node)
// extend it by making it a sub-DAG of a bigger DAG with `depth+1`.
for depth := 1; !db.Done(); depth++ {
// Add the old `root` as a child of the `newRoot`.
newRoot := db.NewFSNodeOverDag(ft.TFile)
newRoot.AddChild(root, fileSize, db)
// Fill the `newRoot` (that has the old `root` already as child)
// and make it the current `root` for the next iteration (when
// it will become "old").
root, fileSize, err = fillNodeRec(db, newRoot, depth)
if err != nil {
return nil, err
}
}
return db.AddNodeAndClose(root)
}
// fillNodeRec will "fill" the given internal (non-leaf) `node` with data by
// adding child nodes to it, either leaf data nodes (if `depth` is 1) or more
// internal nodes with higher depth (and calling itself recursively on them
// until *they* are filled with data). The data to fill the node with is
// provided by DagBuilderHelper.
//
// `node` represents a (sub-)DAG root that is being filled. If called recursively,
// it is `nil`, a new node is created. If it has been called from `Layout` (see
// diagram below) it points to the new root (that increases the depth of the DAG),
// it already has a child (the old root). New children will be added to this new
// root, and those children will in turn be filled (calling `fillNodeRec`
// recursively).
//
// +-------------+
// | `node` |
// | (new root) |
// +-------------+
// |
// +-------------+ - - - - - - + - - - - - - - - - - - +
// | | |
// +--------------+ + - - - - - + + - - - - - +
// | (old root) | | new child | | |
// +--------------+ + - - - - - + + - - - - - +
// | | |
// +------+------+ + - - + - - - +
// | | | |
// +=========+ +=========+ + - - - - + + - - - - +
// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 |
// +=========+ +=========+ + - - - - + + - - - - +
//
// The `node` to be filled uses the `FSNodeOverDag` abstraction that allows adding
// child nodes without packing/unpacking the UnixFS layer node (having an internal
// `ft.FSNode` cache).
//
// It returns the `ipld.Node` representation of the passed `node` filled with
// children and the `nodeFileSize` with the total size of the file chunk (leaf)
// nodes stored under this node (parent nodes store this to enable efficient
// seeking through the DAG when reading data later).
//
// warning: **children** pinned indirectly, but input node IS NOT pinned.
func fillNodeRec(db *h.DagBuilderHelper, node *h.FSNodeOverDag, depth int) (filledNode ipld.Node, nodeFileSize uint64, err error) {
if depth < 1 {
return nil, 0, errors.New("attempt to fillNode at depth < 1")
}
if node == nil {
node = db.NewFSNodeOverDag(ft.TFile)
}
// Child node created on every iteration to add to parent `node`.
// It can be a leaf node or another internal node.
var childNode ipld.Node
// File size from the child node needed to update the `FSNode`
// in `node` when adding the child.
var childFileSize uint64
// While we have room and there is data available to be added.
for node.NumChildren() < db.Maxlinks() && !db.Done() {
if depth == 1 {
// Base case: add leaf node with data.
childNode, childFileSize, err = db.NewLeafDataNode()
if err != nil {
return nil, 0, err
}
} else {
// Recursion case: create an internal node to in turn keep
// descending in the DAG and adding child nodes to it.
childNode, childFileSize, err = fillNodeRec(db, nil, depth-1)
if err != nil {
return nil, 0, err
}
}
err = node.AddChild(childNode, childFileSize, db)
if err != nil {
return nil, 0, err
}
}
nodeFileSize = node.FileSize()
// Get the final `dag.ProtoNode` with the `FSNode` data encoded inside.
filledNode, err = node.Commit()
if err != nil {
return nil, 0, err
}
return filledNode, nodeFileSize, nil
}

View File

@ -1,458 +0,0 @@
package helpers
import (
"context"
"io"
"os"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
pb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb"
pi "gx/ipfs/QmSHjPDw8yNgLZ7cBfX7w3Smn7PHwYhNEpd4LHQQxUg35L/go-ipfs-posinfo"
chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
files "gx/ipfs/QmdE4gMduCKCGAcczM2F5ioYDfdeKuPix138wrES1YSr7f/go-ipfs-cmdkit/files"
)
// DagBuilderHelper wraps together a bunch of objects needed to
// efficiently create unixfs dag trees
type DagBuilderHelper struct {
dserv ipld.DAGService
spl chunker.Splitter
recvdErr error
rawLeaves bool
nextData []byte // the next item to return.
maxlinks int
batch *ipld.Batch
prefix *cid.Prefix
// Filestore support variables.
// ----------------------------
// TODO: Encapsulate in `FilestoreNode` (which is basically what they are).
//
// Besides having the path this variable (if set) is used as a flag
// to indicate that Filestore should be used.
fullPath string
stat os.FileInfo
// Keeps track of the current file size added to the DAG (used in
// the balanced builder). It is assumed that the `DagBuilderHelper`
// is not reused to construct another DAG, but a new one (with a
// zero `offset`) is created.
offset uint64
}
// DagBuilderParams wraps configuration options to create a DagBuilderHelper
// from a chunker.Splitter.
type DagBuilderParams struct {
// Maximum number of links per intermediate node
Maxlinks int
// RawLeaves signifies that the importer should use raw ipld nodes as leaves
// instead of using the unixfs TRaw type
RawLeaves bool
// CID Prefix to use if set
Prefix *cid.Prefix
// DAGService to write blocks to (required)
Dagserv ipld.DAGService
// NoCopy signals to the chunker that it should track fileinfo for
// filestore adds
NoCopy bool
// URL if non-empty (and NoCopy is also true) indicates that the
// file will not be stored in the datastore but instead retrieved
// from this location via the urlstore.
URL string
}
// New generates a new DagBuilderHelper from the given params and a given
// chunker.Splitter as data source.
func (dbp *DagBuilderParams) New(spl chunker.Splitter) *DagBuilderHelper {
db := &DagBuilderHelper{
dserv: dbp.Dagserv,
spl: spl,
rawLeaves: dbp.RawLeaves,
prefix: dbp.Prefix,
maxlinks: dbp.Maxlinks,
batch: ipld.NewBatch(context.TODO(), dbp.Dagserv),
}
if fi, ok := spl.Reader().(files.FileInfo); dbp.NoCopy && ok {
db.fullPath = fi.AbsPath()
db.stat = fi.Stat()
}
if dbp.URL != "" && dbp.NoCopy {
db.fullPath = dbp.URL
}
return db
}
// prepareNext consumes the next item from the splitter and puts it
// in the nextData field. it is idempotent-- if nextData is full
// it will do nothing.
func (db *DagBuilderHelper) prepareNext() {
// if we already have data waiting to be consumed, we're ready
if db.nextData != nil || db.recvdErr != nil {
return
}
db.nextData, db.recvdErr = db.spl.NextBytes()
if db.recvdErr == io.EOF {
db.recvdErr = nil
}
}
// Done returns whether or not we're done consuming the incoming data.
func (db *DagBuilderHelper) Done() bool {
// ensure we have an accurate perspective on data
// as `done` this may be called before `next`.
db.prepareNext() // idempotent
if db.recvdErr != nil {
return false
}
return db.nextData == nil
}
// Next returns the next chunk of data to be inserted into the dag
// if it returns nil, that signifies that the stream is at an end, and
// that the current building operation should finish.
func (db *DagBuilderHelper) Next() ([]byte, error) {
db.prepareNext() // idempotent
d := db.nextData
db.nextData = nil // signal we've consumed it
if db.recvdErr != nil {
return nil, db.recvdErr
}
return d, nil
}
// GetDagServ returns the dagservice object this Helper is using
func (db *DagBuilderHelper) GetDagServ() ipld.DAGService {
return db.dserv
}
// NewUnixfsNode creates a new Unixfs node to represent a file.
func (db *DagBuilderHelper) NewUnixfsNode() *UnixfsNode {
n := &UnixfsNode{
node: new(dag.ProtoNode),
ufmt: ft.NewFSNode(ft.TFile),
}
n.SetPrefix(db.prefix)
return n
}
// GetPrefix returns the internal `cid.Prefix` set in the builder.
func (db *DagBuilderHelper) GetPrefix() *cid.Prefix {
return db.prefix
}
// NewLeaf creates a leaf node filled with data. If rawLeaves is
// defined than a raw leaf will be returned. Otherwise, if data is
// nil the type field will be TRaw (for backwards compatibility), if
// data is defined (but possibly empty) the type field will be TRaw.
func (db *DagBuilderHelper) NewLeaf(data []byte) (*UnixfsNode, error) {
if len(data) > BlockSizeLimit {
return nil, ErrSizeLimitExceeded
}
if db.rawLeaves {
if db.prefix == nil {
return &UnixfsNode{
rawnode: dag.NewRawNode(data),
raw: true,
}, nil
}
rawnode, err := dag.NewRawNodeWPrefix(data, *db.prefix)
if err != nil {
return nil, err
}
return &UnixfsNode{
rawnode: rawnode,
raw: true,
}, nil
}
if data == nil {
return db.NewUnixfsNode(), nil
}
blk := db.newUnixfsBlock()
blk.SetData(data)
return blk, nil
}
// NewLeafNode is a variation from `NewLeaf` (see its description) that
// returns an `ipld.Node` instead.
func (db *DagBuilderHelper) NewLeafNode(data []byte) (ipld.Node, error) {
if len(data) > BlockSizeLimit {
return nil, ErrSizeLimitExceeded
}
if db.rawLeaves {
// Encapsulate the data in a raw node.
if db.prefix == nil {
return dag.NewRawNode(data), nil
}
rawnode, err := dag.NewRawNodeWPrefix(data, *db.prefix)
if err != nil {
return nil, err
}
return rawnode, nil
}
// Encapsulate the data in UnixFS node (instead of a raw node).
fsNodeOverDag := db.NewFSNodeOverDag(ft.TFile)
fsNodeOverDag.SetFileData(data)
node, err := fsNodeOverDag.Commit()
if err != nil {
return nil, err
}
// TODO: Encapsulate this sequence of calls into a function that
// just returns the final `ipld.Node` avoiding going through
// `FSNodeOverDag`.
// TODO: Using `TFile` for backwards-compatibility, a bug in the
// balanced builder was causing the leaf nodes to be generated
// with this type instead of `TRaw`, the one that should be used
// (like the trickle builder does).
// (See https://github.com/ipfs/go-ipfs/pull/5120.)
return node, nil
}
// newUnixfsBlock creates a new Unixfs node to represent a raw data block
func (db *DagBuilderHelper) newUnixfsBlock() *UnixfsNode {
n := &UnixfsNode{
node: new(dag.ProtoNode),
ufmt: ft.NewFSNode(ft.TRaw),
}
n.SetPrefix(db.prefix)
return n
}
// FillNodeLayer will add datanodes as children to the give node until
// at most db.indirSize nodes are added.
func (db *DagBuilderHelper) FillNodeLayer(node *UnixfsNode) error {
// while we have room AND we're not done
for node.NumChildren() < db.maxlinks && !db.Done() {
child, err := db.GetNextDataNode()
if err != nil {
return err
}
if err := node.AddChild(child, db); err != nil {
return err
}
}
return nil
}
// GetNextDataNode builds a UnixFsNode with the data obtained from the
// Splitter, given the constraints (BlockSizeLimit, RawLeaves) specified
// when creating the DagBuilderHelper.
func (db *DagBuilderHelper) GetNextDataNode() (*UnixfsNode, error) {
data, err := db.Next()
if err != nil {
return nil, err
}
if data == nil { // we're done!
return nil, nil
}
return db.NewLeaf(data)
}
// NewLeafDataNode is a variation of `GetNextDataNode` that returns
// an `ipld.Node` instead. It builds the `node` with the data obtained
// from the Splitter and returns it with the `dataSize` (that will be
// used to keep track of the DAG file size). The size of the data is
// computed here because after that it will be hidden by `NewLeafNode`
// inside a generic `ipld.Node` representation.
func (db *DagBuilderHelper) NewLeafDataNode() (node ipld.Node, dataSize uint64, err error) {
fileData, err := db.Next()
if err != nil {
return nil, 0, err
}
dataSize = uint64(len(fileData))
// Create a new leaf node containing the file chunk data.
node, err = db.NewLeafNode(fileData)
if err != nil {
return nil, 0, err
}
// Convert this leaf to a `FilestoreNode` if needed.
node = db.ProcessFileStore(node, dataSize)
return node, dataSize, nil
}
// ProcessFileStore generates, if Filestore is being used, the
// `FilestoreNode` representation of the `ipld.Node` that
// contains the file data. If Filestore is not being used just
// return the same node to continue with its addition to the DAG.
//
// The `db.offset` is updated at this point (instead of when
// `NewLeafDataNode` is called, both work in tandem but the
// offset is more related to this function).
func (db *DagBuilderHelper) ProcessFileStore(node ipld.Node, dataSize uint64) ipld.Node {
// Check if Filestore is being used.
if db.fullPath != "" {
// Check if the node is actually a raw node (needed for
// Filestore support).
if _, ok := node.(*dag.RawNode); ok {
fn := &pi.FilestoreNode{
Node: node,
PosInfo: &pi.PosInfo{
Offset: db.offset,
FullPath: db.fullPath,
Stat: db.stat,
},
}
// Update `offset` with the size of the data generated by `db.Next`.
db.offset += dataSize
return fn
}
}
// Filestore is not used, return the same `node` argument.
return node
}
// Add sends a node to the DAGService, and returns it.
func (db *DagBuilderHelper) Add(node *UnixfsNode) (ipld.Node, error) {
dn, err := node.GetDagNode()
if err != nil {
return nil, err
}
err = db.dserv.Add(context.TODO(), dn)
if err != nil {
return nil, err
}
return dn, nil
}
// Maxlinks returns the configured maximum number for links
// for nodes built with this helper.
func (db *DagBuilderHelper) Maxlinks() int {
return db.maxlinks
}
// Close has the DAGService perform a batch Commit operation.
// It should be called at the end of the building process to make
// sure all data is persisted.
func (db *DagBuilderHelper) Close() error {
return db.batch.Commit()
}
// AddNodeAndClose adds the last `ipld.Node` from the DAG and
// closes the builder. It returns the same `node` passed as
// argument.
func (db *DagBuilderHelper) AddNodeAndClose(node ipld.Node) (ipld.Node, error) {
err := db.batch.Add(node)
if err != nil {
return nil, err
}
err = db.Close()
if err != nil {
return nil, err
}
return node, nil
}
// FSNodeOverDag encapsulates an `unixfs.FSNode` that will be stored in a
// `dag.ProtoNode`. Instead of just having a single `ipld.Node` that
// would need to be constantly (un)packed to access and modify its
// internal `FSNode` in the process of creating a UnixFS DAG, this
// structure stores an `FSNode` cache to manipulate it (add child nodes)
// directly , and only when the node has reached its final (immutable) state
// (signaled by calling `Commit()`) is it committed to a single (indivisible)
// `ipld.Node`.
//
// It is used mainly for internal (non-leaf) nodes, and for some
// representations of data leaf nodes (that don't use raw nodes or
// Filestore).
//
// It aims to replace the `UnixfsNode` structure which encapsulated too
// many possible node state combinations.
//
// TODO: Revisit the name.
type FSNodeOverDag struct {
dag *dag.ProtoNode
file *ft.FSNode
}
// NewFSNodeOverDag creates a new `dag.ProtoNode` and `ft.FSNode`
// decoupled from one onther (and will continue in that way until
// `Commit` is called), with `fsNodeType` specifying the type of
// the UnixFS layer node (either `File` or `Raw`).
func (db *DagBuilderHelper) NewFSNodeOverDag(fsNodeType pb.Data_DataType) *FSNodeOverDag {
node := new(FSNodeOverDag)
node.dag = new(dag.ProtoNode)
node.dag.SetPrefix(db.GetPrefix())
node.file = ft.NewFSNode(fsNodeType)
return node
}
// AddChild adds a `child` `ipld.Node` to both node layers. The
// `dag.ProtoNode` creates a link to the child node while the
// `ft.FSNode` stores its file size (that is, not the size of the
// node but the size of the file data that it is storing at the
// UnixFS layer). The child is also stored in the `DAGService`.
func (n *FSNodeOverDag) AddChild(child ipld.Node, fileSize uint64, db *DagBuilderHelper) error {
err := n.dag.AddNodeLink("", child)
if err != nil {
return err
}
n.file.AddBlockSize(fileSize)
return db.batch.Add(child)
}
// Commit unifies (resolves) the cache nodes into a single `ipld.Node`
// that represents them: the `ft.FSNode` is encoded inside the
// `dag.ProtoNode`.
//
// TODO: Evaluate making it read-only after committing.
func (n *FSNodeOverDag) Commit() (ipld.Node, error) {
fileData, err := n.file.GetBytes()
if err != nil {
return nil, err
}
n.dag.SetData(fileData)
return n.dag, nil
}
// NumChildren returns the number of children of the `ft.FSNode`.
func (n *FSNodeOverDag) NumChildren() int {
return n.file.NumChildren()
}
// FileSize returns the `Filesize` attribute from the underlying
// representation of the `ft.FSNode`.
func (n *FSNodeOverDag) FileSize() uint64 {
return n.file.FileSize()
}
// SetFileData stores the `fileData` in the `ft.FSNode`. It
// should be used only when `FSNodeOverDag` represents a leaf
// node (internal nodes don't carry data, just file sizes).
func (n *FSNodeOverDag) SetFileData(fileData []byte) {
n.file.SetData(fileData)
}

View File

@ -1,173 +0,0 @@
package helpers
import (
"context"
"fmt"
"os"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
pi "gx/ipfs/QmSHjPDw8yNgLZ7cBfX7w3Smn7PHwYhNEpd4LHQQxUg35L/go-ipfs-posinfo"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// BlockSizeLimit specifies the maximum size an imported block can have.
var BlockSizeLimit = 1048576 // 1 MB
// rough estimates on expected sizes
var roughLinkBlockSize = 1 << 13 // 8KB
var roughLinkSize = 34 + 8 + 5 // sha256 multihash + size + no name + protobuf framing
// DefaultLinksPerBlock governs how the importer decides how many links there
// will be per block. This calculation is based on expected distributions of:
// * the expected distribution of block sizes
// * the expected distribution of link sizes
// * desired access speed
// For now, we use:
//
// var roughLinkBlockSize = 1 << 13 // 8KB
// var roughLinkSize = 288 // sha256 + framing + name
// var DefaultLinksPerBlock = (roughLinkBlockSize / roughLinkSize)
//
// See calc_test.go
var DefaultLinksPerBlock = roughLinkBlockSize / roughLinkSize
// ErrSizeLimitExceeded signals that a block is larger than BlockSizeLimit.
var ErrSizeLimitExceeded = fmt.Errorf("object size limit exceeded")
// UnixfsNode is a struct created to aid in the generation
// of unixfs DAG trees
type UnixfsNode struct {
raw bool
rawnode *dag.RawNode
node *dag.ProtoNode
ufmt *ft.FSNode
posInfo *pi.PosInfo
}
// NewUnixfsNodeFromDag reconstructs a Unixfs node from a given dag node
func NewUnixfsNodeFromDag(nd *dag.ProtoNode) (*UnixfsNode, error) {
mb, err := ft.FSNodeFromBytes(nd.Data())
if err != nil {
return nil, err
}
return &UnixfsNode{
node: nd,
ufmt: mb,
}, nil
}
// SetPrefix sets the CID Prefix
func (n *UnixfsNode) SetPrefix(prefix *cid.Prefix) {
n.node.SetPrefix(prefix)
}
// NumChildren returns the number of children referenced by this UnixfsNode.
func (n *UnixfsNode) NumChildren() int {
return n.ufmt.NumChildren()
}
// GetChild gets the ith child of this node from the given DAGService.
func (n *UnixfsNode) GetChild(ctx context.Context, i int, ds ipld.DAGService) (*UnixfsNode, error) {
nd, err := n.node.Links()[i].GetNode(ctx, ds)
if err != nil {
return nil, err
}
pbn, ok := nd.(*dag.ProtoNode)
if !ok {
return nil, dag.ErrNotProtobuf
}
return NewUnixfsNodeFromDag(pbn)
}
// AddChild adds the given UnixfsNode as a child of the receiver.
// The passed in DagBuilderHelper is used to store the child node an
// pin it locally so it doesnt get lost.
func (n *UnixfsNode) AddChild(child *UnixfsNode, db *DagBuilderHelper) error {
n.ufmt.AddBlockSize(child.FileSize())
childnode, err := child.GetDagNode()
if err != nil {
return err
}
// Add a link to this node without storing a reference to the memory
// This way, we avoid nodes building up and consuming all of our RAM
err = n.node.AddNodeLink("", childnode)
if err != nil {
return err
}
err = db.batch.Add(childnode)
return err
}
// RemoveChild deletes the child node at the given index.
func (n *UnixfsNode) RemoveChild(index int, dbh *DagBuilderHelper) {
n.ufmt.RemoveBlockSize(index)
n.node.SetLinks(append(n.node.Links()[:index], n.node.Links()[index+1:]...))
}
// SetData stores data in this node.
func (n *UnixfsNode) SetData(data []byte) {
n.ufmt.SetData(data)
}
// FileSize returns the total file size of this tree (including children)
// In the case of raw nodes, it returns the length of the
// raw data.
func (n *UnixfsNode) FileSize() uint64 {
if n.raw {
return uint64(len(n.rawnode.RawData()))
}
return n.ufmt.FileSize()
}
// SetPosInfo sets information about the offset of the data of this node in a
// filesystem file.
func (n *UnixfsNode) SetPosInfo(offset uint64, fullPath string, stat os.FileInfo) {
n.posInfo = &pi.PosInfo{
Offset: offset,
FullPath: fullPath,
Stat: stat,
}
}
// GetDagNode fills out the proper formatting for the unixfs node
// inside of a DAG node and returns the dag node.
func (n *UnixfsNode) GetDagNode() (ipld.Node, error) {
nd, err := n.getBaseDagNode()
if err != nil {
return nil, err
}
if n.posInfo != nil {
if rn, ok := nd.(*dag.RawNode); ok {
return &pi.FilestoreNode{
Node: rn,
PosInfo: n.posInfo,
}, nil
}
}
return nd, nil
}
func (n *UnixfsNode) getBaseDagNode() (ipld.Node, error) {
if n.raw {
return n.rawnode, nil
}
data, err := n.ufmt.GetBytes()
if err != nil {
return nil, err
}
n.node.SetData(data)
return n.node, nil
}

View File

@ -1,34 +0,0 @@
// Package importer implements utilities used to create IPFS DAGs from files
// and readers.
package importer
import (
chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
bal "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/balanced"
h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle"
)
// BuildDagFromReader creates a DAG given a DAGService and a Splitter
// implementation (Splitters are io.Readers), using a Balanced layout.
func BuildDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
}
return bal.Layout(dbp.New(spl))
}
// BuildTrickleDagFromReader creates a DAG given a DAGService and a Splitter
// implementation (Splitters are io.Readers), using a Trickle Layout.
func BuildTrickleDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
}
return trickle.Layout(dbp.New(spl))
}

View File

@ -1,118 +0,0 @@
package importer
import (
"bytes"
"context"
"io"
"io/ioutil"
"testing"
mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test"
uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io"
u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util"
chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
func getBalancedDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) {
ds := mdtest.Mock()
r := io.LimitReader(u.NewTimeSeededRand(), size)
nd, err := BuildDagFromReader(ds, chunker.NewSizeSplitter(r, blksize))
if err != nil {
t.Fatal(err)
}
return nd, ds
}
func getTrickleDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) {
ds := mdtest.Mock()
r := io.LimitReader(u.NewTimeSeededRand(), size)
nd, err := BuildTrickleDagFromReader(ds, chunker.NewSizeSplitter(r, blksize))
if err != nil {
t.Fatal(err)
}
return nd, ds
}
func TestBalancedDag(t *testing.T) {
ds := mdtest.Mock()
buf := make([]byte, 10000)
u.NewTimeSeededRand().Read(buf)
r := bytes.NewReader(buf)
nd, err := BuildDagFromReader(ds, chunker.DefaultSplitter(r))
if err != nil {
t.Fatal(err)
}
dr, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(dr)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(out, buf) {
t.Fatal("bad read")
}
}
func BenchmarkBalancedReadSmallBlock(b *testing.B) {
b.StopTimer()
nbytes := int64(10000000)
nd, ds := getBalancedDag(b, nbytes, 4096)
b.SetBytes(nbytes)
b.StartTimer()
runReadBench(b, nd, ds)
}
func BenchmarkTrickleReadSmallBlock(b *testing.B) {
b.StopTimer()
nbytes := int64(10000000)
nd, ds := getTrickleDag(b, nbytes, 4096)
b.SetBytes(nbytes)
b.StartTimer()
runReadBench(b, nd, ds)
}
func BenchmarkBalancedReadFull(b *testing.B) {
b.StopTimer()
nbytes := int64(10000000)
nd, ds := getBalancedDag(b, nbytes, chunker.DefaultBlockSize)
b.SetBytes(nbytes)
b.StartTimer()
runReadBench(b, nd, ds)
}
func BenchmarkTrickleReadFull(b *testing.B) {
b.StopTimer()
nbytes := int64(10000000)
nd, ds := getTrickleDag(b, nbytes, chunker.DefaultBlockSize)
b.SetBytes(nbytes)
b.StartTimer()
runReadBench(b, nd, ds)
}
func runReadBench(b *testing.B, nd ipld.Node, ds ipld.DAGService) {
for i := 0; i < b.N; i++ {
ctx, cancel := context.WithCancel(context.Background())
read, err := uio.NewDagReader(ctx, nd, ds)
if err != nil {
b.Fatal(err)
}
_, err = read.WriteTo(ioutil.Discard)
if err != nil && err != io.EOF {
b.Fatal(err)
}
cancel()
}
}

View File

@ -1,640 +0,0 @@
package trickle
import (
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
mrand "math/rand"
"testing"
merkledag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io"
u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util"
chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
type UseRawLeaves bool
const (
ProtoBufLeaves UseRawLeaves = false
RawLeaves UseRawLeaves = true
)
func runBothSubtests(t *testing.T, tfunc func(*testing.T, UseRawLeaves)) {
t.Run("leaves=ProtoBuf", func(t *testing.T) { tfunc(t, ProtoBufLeaves) })
t.Run("leaves=Raw", func(t *testing.T) { tfunc(t, RawLeaves) })
}
func buildTestDag(ds ipld.DAGService, spl chunker.Splitter, rawLeaves UseRawLeaves) (*merkledag.ProtoNode, error) {
dbp := h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
RawLeaves: bool(rawLeaves),
}
nd, err := Layout(dbp.New(spl))
if err != nil {
return nil, err
}
pbnd, ok := nd.(*merkledag.ProtoNode)
if !ok {
return nil, merkledag.ErrNotProtobuf
}
return pbnd, VerifyTrickleDagStructure(pbnd, VerifyParams{
Getter: ds,
Direct: dbp.Maxlinks,
LayerRepeat: layerRepeat,
RawLeaves: bool(rawLeaves),
})
}
//Test where calls to read are smaller than the chunk size
func TestSizeBasedSplit(t *testing.T) {
runBothSubtests(t, testSizeBasedSplit)
}
func testSizeBasedSplit(t *testing.T, rawLeaves UseRawLeaves) {
if testing.Short() {
t.SkipNow()
}
bs := chunker.SizeSplitterGen(512)
testFileConsistency(t, bs, 32*512, rawLeaves)
bs = chunker.SizeSplitterGen(4096)
testFileConsistency(t, bs, 32*4096, rawLeaves)
// Uneven offset
testFileConsistency(t, bs, 31*4095, rawLeaves)
}
func dup(b []byte) []byte {
o := make([]byte, len(b))
copy(o, b)
return o
}
func testFileConsistency(t *testing.T, bs chunker.SplitterGen, nbytes int, rawLeaves UseRawLeaves) {
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, bs(read), rawLeaves)
if err != nil {
t.Fatal(err)
}
r, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(r)
if err != nil {
t.Fatal(err)
}
err = arrComp(out, should)
if err != nil {
t.Fatal(err)
}
}
func TestBuilderConsistency(t *testing.T) {
runBothSubtests(t, testBuilderConsistency)
}
func testBuilderConsistency(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := 100000
buf := new(bytes.Buffer)
io.CopyN(buf, u.NewTimeSeededRand(), int64(nbytes))
should := dup(buf.Bytes())
dagserv := mdtest.Mock()
nd, err := buildTestDag(dagserv, chunker.DefaultSplitter(buf), rawLeaves)
if err != nil {
t.Fatal(err)
}
r, err := uio.NewDagReader(context.Background(), nd, dagserv)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(r)
if err != nil {
t.Fatal(err)
}
err = arrComp(out, should)
if err != nil {
t.Fatal(err)
}
}
func arrComp(a, b []byte) error {
if len(a) != len(b) {
return fmt.Errorf("arrays differ in length. %d != %d", len(a), len(b))
}
for i, v := range a {
if v != b[i] {
return fmt.Errorf("arrays differ at index: %d", i)
}
}
return nil
}
func TestIndirectBlocks(t *testing.T) {
runBothSubtests(t, testIndirectBlocks)
}
func testIndirectBlocks(t *testing.T, rawLeaves UseRawLeaves) {
splitter := chunker.SizeSplitterGen(512)
nbytes := 1024 * 1024
buf := make([]byte, nbytes)
u.NewTimeSeededRand().Read(buf)
read := bytes.NewReader(buf)
ds := mdtest.Mock()
dag, err := buildTestDag(ds, splitter(read), rawLeaves)
if err != nil {
t.Fatal(err)
}
reader, err := uio.NewDagReader(context.Background(), dag, ds)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(reader)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(out, buf) {
t.Fatal("Not equal!")
}
}
func TestSeekingBasic(t *testing.T) {
runBothSubtests(t, testSeekingBasic)
}
func testSeekingBasic(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(10 * 1024)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 512), rawLeaves)
if err != nil {
t.Fatal(err)
}
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
start := int64(4000)
n, err := rs.Seek(start, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if n != start {
t.Fatal("Failed to seek to correct offset")
}
out, err := ioutil.ReadAll(rs)
if err != nil {
t.Fatal(err)
}
err = arrComp(out, should[start:])
if err != nil {
t.Fatal(err)
}
}
func TestSeekToBegin(t *testing.T) {
runBothSubtests(t, testSeekToBegin)
}
func testSeekToBegin(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(10 * 1024)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
n, err := io.CopyN(ioutil.Discard, rs, 1024*4)
if err != nil {
t.Fatal(err)
}
if n != 4096 {
t.Fatal("Copy didnt copy enough bytes")
}
seeked, err := rs.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if seeked != 0 {
t.Fatal("Failed to seek to beginning")
}
out, err := ioutil.ReadAll(rs)
if err != nil {
t.Fatal(err)
}
err = arrComp(out, should)
if err != nil {
t.Fatal(err)
}
}
func TestSeekToAlmostBegin(t *testing.T) {
runBothSubtests(t, testSeekToAlmostBegin)
}
func testSeekToAlmostBegin(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(10 * 1024)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
n, err := io.CopyN(ioutil.Discard, rs, 1024*4)
if err != nil {
t.Fatal(err)
}
if n != 4096 {
t.Fatal("Copy didnt copy enough bytes")
}
seeked, err := rs.Seek(1, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if seeked != 1 {
t.Fatal("Failed to seek to almost beginning")
}
out, err := ioutil.ReadAll(rs)
if err != nil {
t.Fatal(err)
}
err = arrComp(out, should[1:])
if err != nil {
t.Fatal(err)
}
}
func TestSeekEnd(t *testing.T) {
runBothSubtests(t, testSeekEnd)
}
func testSeekEnd(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(50 * 1024)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
seeked, err := rs.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
if seeked != nbytes {
t.Fatal("Failed to seek to end")
}
}
func TestSeekEndSingleBlockFile(t *testing.T) {
runBothSubtests(t, testSeekEndSingleBlockFile)
}
func testSeekEndSingleBlockFile(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(100)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 5000), rawLeaves)
if err != nil {
t.Fatal(err)
}
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
seeked, err := rs.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
if seeked != nbytes {
t.Fatal("Failed to seek to end")
}
}
func TestSeekingStress(t *testing.T) {
runBothSubtests(t, testSeekingStress)
}
func testSeekingStress(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(1024 * 1024)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 1000), rawLeaves)
if err != nil {
t.Fatal(err)
}
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
testbuf := make([]byte, nbytes)
for i := 0; i < 50; i++ {
offset := mrand.Intn(int(nbytes))
l := int(nbytes) - offset
n, err := rs.Seek(int64(offset), io.SeekStart)
if err != nil {
t.Fatal(err)
}
if n != int64(offset) {
t.Fatal("Seek failed to move to correct position")
}
nread, err := rs.Read(testbuf[:l])
if err != nil {
t.Fatal(err)
}
if nread != l {
t.Fatal("Failed to read enough bytes")
}
err = arrComp(testbuf[:l], should[offset:offset+l])
if err != nil {
t.Fatal(err)
}
}
}
func TestSeekingConsistency(t *testing.T) {
runBothSubtests(t, testSeekingConsistency)
}
func testSeekingConsistency(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(128 * 1024)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(should)
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
rs, err := uio.NewDagReader(context.Background(), nd, ds)
if err != nil {
t.Fatal(err)
}
out := make([]byte, nbytes)
for coff := nbytes - 4096; coff >= 0; coff -= 4096 {
t.Log(coff)
n, err := rs.Seek(coff, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if n != coff {
t.Fatal("wasnt able to seek to the right position")
}
nread, err := rs.Read(out[coff : coff+4096])
if err != nil {
t.Fatal(err)
}
if nread != 4096 {
t.Fatal("didnt read the correct number of bytes")
}
}
err = arrComp(out, should)
if err != nil {
t.Fatal(err)
}
}
func TestAppend(t *testing.T) {
runBothSubtests(t, testAppend)
}
func testAppend(t *testing.T, rawLeaves UseRawLeaves) {
nbytes := int64(128 * 1024)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
// Reader for half the bytes
read := bytes.NewReader(should[:nbytes/2])
ds := mdtest.Mock()
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
dbp := &h.DagBuilderParams{
Dagserv: ds,
Maxlinks: h.DefaultLinksPerBlock,
RawLeaves: bool(rawLeaves),
}
r := bytes.NewReader(should[nbytes/2:])
ctx := context.Background()
nnode, err := Append(ctx, nd, dbp.New(chunker.NewSizeSplitter(r, 500)))
if err != nil {
t.Fatal(err)
}
err = VerifyTrickleDagStructure(nnode, VerifyParams{
Getter: ds,
Direct: dbp.Maxlinks,
LayerRepeat: layerRepeat,
RawLeaves: bool(rawLeaves),
})
if err != nil {
t.Fatal(err)
}
fread, err := uio.NewDagReader(ctx, nnode, ds)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(fread)
if err != nil {
t.Fatal(err)
}
err = arrComp(out, should)
if err != nil {
t.Fatal(err)
}
}
// This test appends one byte at a time to an empty file
func TestMultipleAppends(t *testing.T) {
runBothSubtests(t, testMultipleAppends)
}
func testMultipleAppends(t *testing.T, rawLeaves UseRawLeaves) {
ds := mdtest.Mock()
// TODO: fix small size appends and make this number bigger
nbytes := int64(1000)
should := make([]byte, nbytes)
u.NewTimeSeededRand().Read(should)
read := bytes.NewReader(nil)
nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves)
if err != nil {
t.Fatal(err)
}
dbp := &h.DagBuilderParams{
Dagserv: ds,
Maxlinks: 4,
RawLeaves: bool(rawLeaves),
}
spl := chunker.SizeSplitterGen(500)
ctx := context.Background()
for i := 0; i < len(should); i++ {
nnode, err := Append(ctx, nd, dbp.New(spl(bytes.NewReader(should[i:i+1]))))
if err != nil {
t.Fatal(err)
}
err = VerifyTrickleDagStructure(nnode, VerifyParams{
Getter: ds,
Direct: dbp.Maxlinks,
LayerRepeat: layerRepeat,
RawLeaves: bool(rawLeaves),
})
if err != nil {
t.Fatal(err)
}
fread, err := uio.NewDagReader(ctx, nnode, ds)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(fread)
if err != nil {
t.Fatal(err)
}
err = arrComp(out, should[:i+1])
if err != nil {
t.Fatal(err)
}
}
}
func TestAppendSingleBytesToEmpty(t *testing.T) {
ds := mdtest.Mock()
data := []byte("AB")
nd := new(merkledag.ProtoNode)
nd.SetData(ft.FilePBData(nil, 0))
dbp := &h.DagBuilderParams{
Dagserv: ds,
Maxlinks: 4,
}
spl := chunker.SizeSplitterGen(500)
ctx := context.Background()
nnode, err := Append(ctx, nd, dbp.New(spl(bytes.NewReader(data[:1]))))
if err != nil {
t.Fatal(err)
}
nnode, err = Append(ctx, nnode, dbp.New(spl(bytes.NewReader(data[1:]))))
if err != nil {
t.Fatal(err)
}
fread, err := uio.NewDagReader(ctx, nnode, ds)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(fread)
if err != nil {
t.Fatal(err)
}
fmt.Println(out, data)
err = arrComp(out, data)
if err != nil {
t.Fatal(err)
}
}

View File

@ -1,366 +0,0 @@
// Package trickle allows to build trickle DAGs.
// In this type of DAG, non-leave nodes are first filled
// with data leaves, and then incorporate "layers" of subtrees
// as additional links.
//
// Each layer is a trickle sub-tree and is limited by an increasing
// maximum depth. Thus, the nodes first layer
// can only hold leaves (depth 1) but subsequent layers can grow deeper.
// By default, this module places 4 nodes per layer (that is, 4 subtrees
// of the same maximum depth before increasing it).
//
// Trickle DAGs are very good for sequentially reading data, as the
// first data leaves are directly reachable from the root and those
// coming next are always nearby. They are
// suited for things like streaming applications.
package trickle
import (
"context"
"errors"
"fmt"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// layerRepeat specifies how many times to append a child tree of a
// given depth. Higher values increase the width of a given node, which
// improves seek speeds.
const layerRepeat = 4
// Layout builds a new DAG with the trickle format using the provided
// DagBuilderHelper. See the module's description for a more detailed
// explanation.
func Layout(db *h.DagBuilderHelper) (ipld.Node, error) {
root := db.NewUnixfsNode()
if err := fillTrickleRec(db, root, -1); err != nil {
return nil, err
}
out, err := db.Add(root)
if err != nil {
return nil, err
}
if err := db.Close(); err != nil {
return nil, err
}
return out, nil
}
// fillTrickleRec creates a trickle (sub-)tree with an optional maximum specified depth
// in the case maxDepth is greater than zero, or with unlimited depth otherwise
// (where the DAG builder will signal the end of data to end the function).
func fillTrickleRec(db *h.DagBuilderHelper, node *h.UnixfsNode, maxDepth int) error {
// Always do this, even in the base case
if err := db.FillNodeLayer(node); err != nil {
return err
}
for depth := 1; ; depth++ {
// Apply depth limit only if the parameter is set (> 0).
if maxDepth > 0 && depth == maxDepth {
return nil
}
for layer := 0; layer < layerRepeat; layer++ {
if db.Done() {
return nil
}
nextChild := db.NewUnixfsNode()
if err := fillTrickleRec(db, nextChild, depth); err != nil {
return err
}
if err := node.AddChild(nextChild, db); err != nil {
return err
}
}
}
}
// Append appends the data in `db` to the dag, using the Trickledag format
func Append(ctx context.Context, basen ipld.Node, db *h.DagBuilderHelper) (out ipld.Node, errOut error) {
base, ok := basen.(*dag.ProtoNode)
if !ok {
return nil, dag.ErrNotProtobuf
}
defer func() {
if errOut == nil {
if err := db.Close(); err != nil {
errOut = err
}
}
}()
// Convert to unixfs node for working with easily
ufsn, err := h.NewUnixfsNodeFromDag(base)
if err != nil {
return nil, err
}
// Get depth of this 'tree'
n, layerProgress := trickleDepthInfo(ufsn, db.Maxlinks())
if n == 0 {
// If direct blocks not filled...
if err := db.FillNodeLayer(ufsn); err != nil {
return nil, err
}
if db.Done() {
return ufsn.GetDagNode()
}
// If continuing, our depth has increased by one
n++
}
// Last child in this node may not be a full tree, lets file it up
if err := appendFillLastChild(ctx, ufsn, n-1, layerProgress, db); err != nil {
return nil, err
}
// after appendFillLastChild, our depth is now increased by one
if !db.Done() {
n++
}
// Now, continue filling out tree like normal
for i := n; !db.Done(); i++ {
for j := 0; j < layerRepeat && !db.Done(); j++ {
next := db.NewUnixfsNode()
err := fillTrickleRec(db, next, i)
if err != nil {
return nil, err
}
err = ufsn.AddChild(next, db)
if err != nil {
return nil, err
}
}
}
return ufsn.GetDagNode()
}
// appendFillLastChild will take in an incomplete trickledag node (uncomplete meaning, not full) and
// fill it out to the specified depth with blocks from the given DagBuilderHelper
func appendFillLastChild(ctx context.Context, ufsn *h.UnixfsNode, depth int, layerFill int, db *h.DagBuilderHelper) error {
if ufsn.NumChildren() <= db.Maxlinks() {
return nil
}
// Recursive step, grab last child
last := ufsn.NumChildren() - 1
lastChild, err := ufsn.GetChild(ctx, last, db.GetDagServ())
if err != nil {
return err
}
// Fill out last child (may not be full tree)
nchild, err := appendRec(ctx, lastChild, db, depth-1)
if err != nil {
return err
}
// Update changed child in parent node
ufsn.RemoveChild(last, db)
err = ufsn.AddChild(nchild, db)
if err != nil {
return err
}
// Partially filled depth layer
if layerFill != 0 {
for ; layerFill < layerRepeat && !db.Done(); layerFill++ {
next := db.NewUnixfsNode()
err := fillTrickleRec(db, next, depth)
if err != nil {
return err
}
err = ufsn.AddChild(next, db)
if err != nil {
return err
}
}
}
return nil
}
// recursive call for Append
func appendRec(ctx context.Context, ufsn *h.UnixfsNode, db *h.DagBuilderHelper, depth int) (*h.UnixfsNode, error) {
if depth == 0 || db.Done() {
return ufsn, nil
}
// Get depth of this 'tree'
n, layerProgress := trickleDepthInfo(ufsn, db.Maxlinks())
if n == 0 {
// If direct blocks not filled...
if err := db.FillNodeLayer(ufsn); err != nil {
return nil, err
}
n++
}
// If at correct depth, no need to continue
if n == depth {
return ufsn, nil
}
if err := appendFillLastChild(ctx, ufsn, n, layerProgress, db); err != nil {
return nil, err
}
// after appendFillLastChild, our depth is now increased by one
if !db.Done() {
n++
}
// Now, continue filling out tree like normal
for i := n; i < depth && !db.Done(); i++ {
for j := 0; j < layerRepeat && !db.Done(); j++ {
next := db.NewUnixfsNode()
if err := fillTrickleRec(db, next, i); err != nil {
return nil, err
}
if err := ufsn.AddChild(next, db); err != nil {
return nil, err
}
}
}
return ufsn, nil
}
func trickleDepthInfo(node *h.UnixfsNode, maxlinks int) (int, int) {
n := node.NumChildren()
if n < maxlinks {
return 0, 0
}
return ((n - maxlinks) / layerRepeat) + 1, (n - maxlinks) % layerRepeat
}
// VerifyParams is used by VerifyTrickleDagStructure
type VerifyParams struct {
Getter ipld.NodeGetter
Direct int
LayerRepeat int
Prefix *cid.Prefix
RawLeaves bool
}
// VerifyTrickleDagStructure checks that the given dag matches exactly the trickle dag datastructure
// layout
func VerifyTrickleDagStructure(nd ipld.Node, p VerifyParams) error {
return verifyTDagRec(nd, -1, p)
}
// Recursive call for verifying the structure of a trickledag
func verifyTDagRec(n ipld.Node, depth int, p VerifyParams) error {
codec := cid.DagProtobuf
if depth == 0 {
if len(n.Links()) > 0 {
return errors.New("expected direct block")
}
// zero depth dag is raw data block
switch nd := n.(type) {
case *dag.ProtoNode:
pbn, err := ft.FromBytes(nd.Data())
if err != nil {
return err
}
if pbn.GetType() != ft.TRaw {
return errors.New("expected raw block")
}
if p.RawLeaves {
return errors.New("expected raw leaf, got a protobuf node")
}
case *dag.RawNode:
if !p.RawLeaves {
return errors.New("expected protobuf node as leaf")
}
codec = cid.Raw
default:
return errors.New("expected ProtoNode or RawNode")
}
}
// verify prefix
if p.Prefix != nil {
prefix := n.Cid().Prefix()
expect := *p.Prefix // make a copy
expect.Codec = uint64(codec)
if codec == cid.Raw && expect.Version == 0 {
expect.Version = 1
}
if expect.MhLength == -1 {
expect.MhLength = prefix.MhLength
}
if prefix != expect {
return fmt.Errorf("unexpected cid prefix: expected: %v; got %v", expect, prefix)
}
}
if depth == 0 {
return nil
}
nd, ok := n.(*dag.ProtoNode)
if !ok {
return errors.New("expected ProtoNode")
}
// Verify this is a branch node
pbn, err := ft.FromBytes(nd.Data())
if err != nil {
return err
}
if pbn.GetType() != ft.TFile {
return fmt.Errorf("expected file as branch node, got: %s", pbn.GetType())
}
if len(pbn.Data) > 0 {
return errors.New("branch node should not have data")
}
for i := 0; i < len(nd.Links()); i++ {
child, err := nd.Links()[i].GetNode(context.TODO(), p.Getter)
if err != nil {
return err
}
if i < p.Direct {
// Direct blocks
err := verifyTDagRec(child, 0, p)
if err != nil {
return err
}
} else {
// Recursive trickle dags
rdepth := ((i - p.Direct) / p.LayerRepeat) + 1
if rdepth >= depth && depth > 0 {
return errors.New("child dag was too deep")
}
err := verifyTDagRec(child, rdepth, p)
if err != nil {
return err
}
}
}
return nil
}

View File

@ -1,39 +0,0 @@
package io
import (
"bytes"
"context"
)
// BufDagReader implements a DagReader that reads from a byte slice
// using a bytes.Reader. It is used for RawNodes.
type BufDagReader struct {
*bytes.Reader
}
// NewBufDagReader returns a DAG reader for the given byte slice.
// BufDagReader is used to read RawNodes.
func NewBufDagReader(b []byte) *BufDagReader {
return &BufDagReader{bytes.NewReader(b)}
}
var _ DagReader = (*BufDagReader)(nil)
// Close is a nop.
func (*BufDagReader) Close() error {
return nil
}
// CtxReadFull reads the slice onto b.
func (rd *BufDagReader) CtxReadFull(ctx context.Context, b []byte) (int, error) {
return rd.Read(b)
}
// Size returns the size of the buffer.
func (rd *BufDagReader) Size() uint64 {
s := rd.Reader.Size()
if s < 0 {
panic("size smaller than 0 (impossible!!)")
}
return uint64(s)
}

View File

@ -1,79 +0,0 @@
package io
import (
"context"
"errors"
"io"
mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
ftpb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// Common errors
var (
ErrIsDir = errors.New("this dag node is a directory")
ErrCantReadSymlinks = errors.New("cannot currently read symlinks")
ErrUnkownNodeType = errors.New("unknown node type")
)
// A DagReader provides read-only read and seek acess to a unixfs file.
// Different implementations of readers are used for the different
// types of unixfs/protobuf-encoded nodes.
type DagReader interface {
ReadSeekCloser
Size() uint64
CtxReadFull(context.Context, []byte) (int, error)
}
// A ReadSeekCloser implements interfaces to read, copy, seek and close.
type ReadSeekCloser interface {
io.Reader
io.Seeker
io.Closer
io.WriterTo
}
// NewDagReader creates a new reader object that reads the data represented by
// the given node, using the passed in DAGService for data retrieval
func NewDagReader(ctx context.Context, n ipld.Node, serv ipld.NodeGetter) (DagReader, error) {
switch n := n.(type) {
case *mdag.RawNode:
return NewBufDagReader(n.RawData()), nil
case *mdag.ProtoNode:
fsNode, err := ft.FSNodeFromBytes(n.Data())
if err != nil {
return nil, err
}
switch fsNode.Type() {
case ftpb.Data_Directory, ftpb.Data_HAMTShard:
// Dont allow reading directories
return nil, ErrIsDir
case ftpb.Data_File, ftpb.Data_Raw:
return NewPBFileReader(ctx, n, fsNode, serv), nil
case ftpb.Data_Metadata:
if len(n.Links()) == 0 {
return nil, errors.New("incorrectly formatted metadata object")
}
child, err := n.Links()[0].GetNode(ctx, serv)
if err != nil {
return nil, err
}
childpb, ok := child.(*mdag.ProtoNode)
if !ok {
return nil, mdag.ErrNotProtobuf
}
return NewDagReader(ctx, childpb, serv)
case ftpb.Data_Symlink:
return nil, ErrCantReadSymlinks
default:
return nil, ft.ErrUnrecognizedType
}
default:
return nil, ErrUnkownNodeType
}
}

View File

@ -1,347 +0,0 @@
package io
import (
"bytes"
"io"
"io/ioutil"
"math/rand"
"strings"
"testing"
mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
"gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
context "context"
testu "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/test"
)
func TestBasicRead(t *testing.T) {
dserv := testu.GetDAGServ()
inbuf, node := testu.GetRandomNode(t, dserv, 1024, testu.UseProtoBufLeaves)
ctx, closer := context.WithCancel(context.Background())
defer closer()
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
outbuf, err := ioutil.ReadAll(reader)
if err != nil {
t.Fatal(err)
}
err = testu.ArrComp(inbuf, outbuf)
if err != nil {
t.Fatal(err)
}
}
func TestSeekAndRead(t *testing.T) {
dserv := testu.GetDAGServ()
inbuf := make([]byte, 256)
for i := 0; i <= 255; i++ {
inbuf[i] = byte(i)
}
node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves)
ctx, closer := context.WithCancel(context.Background())
defer closer()
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
for i := 255; i >= 0; i-- {
reader.Seek(int64(i), io.SeekStart)
if getOffset(reader) != int64(i) {
t.Fatal("expected offset to be increased by one after read")
}
out := readByte(t, reader)
if int(out) != i {
t.Fatalf("read %d at index %d, expected %d", out, i, i)
}
if getOffset(reader) != int64(i+1) {
t.Fatal("expected offset to be increased by one after read")
}
}
}
func TestSeekAndReadLarge(t *testing.T) {
dserv := testu.GetDAGServ()
inbuf := make([]byte, 20000)
rand.Read(inbuf)
node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves)
ctx, closer := context.WithCancel(context.Background())
defer closer()
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
_, err = reader.Seek(10000, io.SeekStart)
if err != nil {
t.Fatal(err)
}
buf := make([]byte, 100)
_, err = io.ReadFull(reader, buf)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(buf, inbuf[10000:10100]) {
t.Fatal("seeked read failed")
}
pbdr := reader.(*PBDagReader)
var count int
for i, p := range pbdr.promises {
if i > 20 && i < 30 {
if p == nil {
t.Fatal("expected index to be not nil: ", i)
}
count++
} else {
if p != nil {
t.Fatal("expected index to be nil: ", i)
}
}
}
// -1 because we read some and it cleared one
if count != preloadSize-1 {
t.Fatalf("expected %d preloaded promises, got %d", preloadSize-1, count)
}
}
func TestReadAndCancel(t *testing.T) {
dserv := testu.GetDAGServ()
inbuf := make([]byte, 20000)
rand.Read(inbuf)
node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves)
ctx, closer := context.WithCancel(context.Background())
defer closer()
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
ctx, cancel := context.WithCancel(context.Background())
buf := make([]byte, 100)
_, err = reader.CtxReadFull(ctx, buf)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(buf, inbuf[0:100]) {
t.Fatal("read failed")
}
cancel()
b, err := ioutil.ReadAll(reader)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(inbuf[100:], b) {
t.Fatal("buffers not equal")
}
}
func TestRelativeSeek(t *testing.T) {
dserv := testu.GetDAGServ()
ctx, closer := context.WithCancel(context.Background())
defer closer()
inbuf := make([]byte, 1024)
for i := 0; i < 256; i++ {
inbuf[i*4] = byte(i)
}
inbuf[1023] = 1 // force the reader to be 1024 bytes
node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves)
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 256; i++ {
if getOffset(reader) != int64(i*4) {
t.Fatalf("offset should be %d, was %d", i*4, getOffset(reader))
}
out := readByte(t, reader)
if int(out) != i {
t.Fatalf("expected to read: %d at %d, read %d", i, getOffset(reader)-1, out)
}
if i != 255 {
_, err := reader.Seek(3, io.SeekCurrent)
if err != nil {
t.Fatal(err)
}
}
}
_, err = reader.Seek(4, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 256; i++ {
if getOffset(reader) != int64(1020-i*4) {
t.Fatalf("offset should be %d, was %d", 1020-i*4, getOffset(reader))
}
out := readByte(t, reader)
if int(out) != 255-i {
t.Fatalf("expected to read: %d at %d, read %d", 255-i, getOffset(reader)-1, out)
}
reader.Seek(-5, io.SeekCurrent) // seek 4 bytes but we read one byte every time so 5 bytes
}
}
func TestTypeFailures(t *testing.T) {
dserv := testu.GetDAGServ()
ctx, closer := context.WithCancel(context.Background())
defer closer()
node := unixfs.EmptyDirNode()
if _, err := NewDagReader(ctx, node, dserv); err != ErrIsDir {
t.Fatalf("excepted to get %v, got %v", ErrIsDir, err)
}
data, err := unixfs.SymlinkData("/somelink")
if err != nil {
t.Fatal(err)
}
node = mdag.NodeWithData(data)
if _, err := NewDagReader(ctx, node, dserv); err != ErrCantReadSymlinks {
t.Fatalf("excepted to get %v, got %v", ErrCantReadSymlinks, err)
}
}
func TestBadPBData(t *testing.T) {
dserv := testu.GetDAGServ()
ctx, closer := context.WithCancel(context.Background())
defer closer()
node := mdag.NodeWithData([]byte{42})
_, err := NewDagReader(ctx, node, dserv)
if err == nil {
t.Fatal("excepted error, got nil")
}
}
func TestMetadataNode(t *testing.T) {
ctx, closer := context.WithCancel(context.Background())
defer closer()
dserv := testu.GetDAGServ()
rdata, rnode := testu.GetRandomNode(t, dserv, 512, testu.UseProtoBufLeaves)
err := dserv.Add(ctx, rnode)
if err != nil {
t.Fatal(err)
}
data, err := unixfs.BytesForMetadata(&unixfs.Metadata{
MimeType: "text",
Size: 125,
})
if err != nil {
t.Fatal(err)
}
node := mdag.NodeWithData(data)
_, err = NewDagReader(ctx, node, dserv)
if err == nil {
t.Fatal("expected an error")
}
if !strings.Contains(err.Error(), "incorrectly formatted") {
t.Fatal("expected different error")
}
node.AddNodeLink("", rnode)
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
readdata, err := ioutil.ReadAll(reader)
if err != nil {
t.Fatal(err)
}
if err := testu.ArrComp(rdata, readdata); err != nil {
t.Fatal(err)
}
}
func TestWriteTo(t *testing.T) {
dserv := testu.GetDAGServ()
inbuf, node := testu.GetRandomNode(t, dserv, 1024, testu.UseProtoBufLeaves)
ctx, closer := context.WithCancel(context.Background())
defer closer()
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
outbuf := new(bytes.Buffer)
reader.WriteTo(outbuf)
err = testu.ArrComp(inbuf, outbuf.Bytes())
if err != nil {
t.Fatal(err)
}
}
func TestReaderSzie(t *testing.T) {
dserv := testu.GetDAGServ()
size := int64(1024)
_, node := testu.GetRandomNode(t, dserv, size, testu.UseProtoBufLeaves)
ctx, closer := context.WithCancel(context.Background())
defer closer()
reader, err := NewDagReader(ctx, node, dserv)
if err != nil {
t.Fatal(err)
}
if reader.Size() != uint64(size) {
t.Fatal("wrong reader size")
}
}
func readByte(t testing.TB, reader DagReader) byte {
out := make([]byte, 1)
c, err := reader.Read(out)
if c != 1 {
t.Fatal("reader should have read just one byte")
}
if err != nil {
t.Fatal(err)
}
return out[0]
}
func getOffset(reader DagReader) int64 {
offset, err := reader.Seek(0, io.SeekCurrent)
if err != nil {
panic("failed to retrieve offset: " + err.Error())
}
return offset
}

View File

@ -1,257 +0,0 @@
package io
import (
"context"
"fmt"
"os"
mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
format "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
hamt "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/hamt"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// UseHAMTSharding is a global flag that signifies whether or not to use the
// HAMT sharding scheme for directory creation
var UseHAMTSharding = false
// DefaultShardWidth is the default value used for hamt sharding width.
var DefaultShardWidth = 256
// Directory defines a UnixFS directory. It is used for creating, reading and
// editing directories. It allows to work with different directory schemes,
// like the basic or the HAMT implementation.
//
// It just allows to perform explicit edits on a single directory, working with
// directory trees is out of its scope, they are managed by the MFS layer
// (which is the main consumer of this interface).
type Directory interface {
// SetPrefix sets the CID prefix of the root node.
SetPrefix(*cid.Prefix)
// AddChild adds a (name, key) pair to the root node.
AddChild(context.Context, string, ipld.Node) error
// ForEachLink applies the given function to Links in the directory.
ForEachLink(context.Context, func(*ipld.Link) error) error
// Links returns the all the links in the directory node.
Links(context.Context) ([]*ipld.Link, error)
// Find returns the root node of the file named 'name' within this directory.
// In the case of HAMT-directories, it will traverse the tree.
Find(context.Context, string) (ipld.Node, error)
// RemoveChild removes the child with the given name.
RemoveChild(context.Context, string) error
// GetNode returns the root of this directory.
GetNode() (ipld.Node, error)
// GetPrefix returns the CID Prefix used.
GetPrefix() *cid.Prefix
}
// TODO: Evaluate removing `dserv` from this layer and providing it in MFS.
// (The functions should in that case add a `DAGService` argument.)
// BasicDirectory is the basic implementation of `Directory`. All the entries
// are stored in a single node.
type BasicDirectory struct {
node *mdag.ProtoNode
dserv ipld.DAGService
}
// HAMTDirectory is the HAMT implementation of `Directory`.
// (See package `hamt` for more information.)
type HAMTDirectory struct {
shard *hamt.Shard
dserv ipld.DAGService
}
// NewDirectory returns a Directory. It needs a `DAGService` to add the children.
func NewDirectory(dserv ipld.DAGService) Directory {
if UseHAMTSharding {
dir := new(HAMTDirectory)
s, err := hamt.NewShard(dserv, DefaultShardWidth)
if err != nil {
panic(err) // will only panic if DefaultShardWidth is a bad value
}
dir.shard = s
dir.dserv = dserv
return dir
}
dir := new(BasicDirectory)
dir.node = format.EmptyDirNode()
dir.dserv = dserv
return dir
}
// ErrNotADir implies that the given node was not a unixfs directory
var ErrNotADir = fmt.Errorf("merkledag node was not a directory or shard")
// NewDirectoryFromNode loads a unixfs directory from the given IPLD node and
// DAGService.
func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, error) {
protoBufNode, ok := node.(*mdag.ProtoNode)
if !ok {
return nil, ErrNotADir
}
fsNode, err := format.FSNodeFromBytes(protoBufNode.Data())
if err != nil {
return nil, err
}
switch fsNode.Type() {
case format.TDirectory:
return &BasicDirectory{
dserv: dserv,
node: protoBufNode.Copy().(*mdag.ProtoNode),
}, nil
case format.THAMTShard:
shard, err := hamt.NewHamtFromDag(dserv, node)
if err != nil {
return nil, err
}
return &HAMTDirectory{
dserv: dserv,
shard: shard,
}, nil
}
return nil, ErrNotADir
}
// SetPrefix implements the `Directory` interface.
func (d *BasicDirectory) SetPrefix(prefix *cid.Prefix) {
d.node.SetPrefix(prefix)
}
// AddChild implements the `Directory` interface. It adds (or replaces)
// a link to the given `node` under `name`.
func (d *BasicDirectory) AddChild(ctx context.Context, name string, node ipld.Node) error {
d.node.RemoveNodeLink(name)
// Remove old link (if it existed), don't check a potential `ErrNotFound`.
return d.node.AddNodeLink(name, node)
}
// ForEachLink implements the `Directory` interface.
func (d *BasicDirectory) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
for _, l := range d.node.Links() {
if err := f(l); err != nil {
return err
}
}
return nil
}
// Links implements the `Directory` interface.
func (d *BasicDirectory) Links(ctx context.Context) ([]*ipld.Link, error) {
return d.node.Links(), nil
}
// Find implements the `Directory` interface.
func (d *BasicDirectory) Find(ctx context.Context, name string) (ipld.Node, error) {
lnk, err := d.node.GetNodeLink(name)
if err == mdag.ErrLinkNotFound {
err = os.ErrNotExist
}
if err != nil {
return nil, err
}
return d.dserv.Get(ctx, lnk.Cid)
}
// RemoveChild implements the `Directory` interface.
func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error {
return d.node.RemoveNodeLink(name)
}
// GetNode implements the `Directory` interface.
func (d *BasicDirectory) GetNode() (ipld.Node, error) {
return d.node, nil
}
// GetPrefix implements the `Directory` interface.
func (d *BasicDirectory) GetPrefix() *cid.Prefix {
return &d.node.Prefix
}
// SwitchToSharding returns a HAMT implementation of this directory.
func (d *BasicDirectory) SwitchToSharding(ctx context.Context) (Directory, error) {
hamtDir := new(HAMTDirectory)
hamtDir.dserv = d.dserv
shard, err := hamt.NewShard(d.dserv, DefaultShardWidth)
if err != nil {
return nil, err
}
shard.SetPrefix(&d.node.Prefix)
hamtDir.shard = shard
for _, lnk := range d.node.Links() {
node, err := d.dserv.Get(ctx, lnk.Cid)
if err != nil {
return nil, err
}
err = hamtDir.shard.Set(ctx, lnk.Name, node)
if err != nil {
return nil, err
}
}
return hamtDir, nil
}
// SetPrefix implements the `Directory` interface.
func (d *HAMTDirectory) SetPrefix(prefix *cid.Prefix) {
d.shard.SetPrefix(prefix)
}
// AddChild implements the `Directory` interface.
func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error {
return d.shard.Set(ctx, name, nd)
}
// ForEachLink implements the `Directory` interface.
func (d *HAMTDirectory) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error {
return d.shard.ForEachLink(ctx, f)
}
// Links implements the `Directory` interface.
func (d *HAMTDirectory) Links(ctx context.Context) ([]*ipld.Link, error) {
return d.shard.EnumLinks(ctx)
}
// Find implements the `Directory` interface. It will traverse the tree.
func (d *HAMTDirectory) Find(ctx context.Context, name string) (ipld.Node, error) {
lnk, err := d.shard.Find(ctx, name)
if err != nil {
return nil, err
}
return lnk.GetNode(ctx, d.dserv)
}
// RemoveChild implements the `Directory` interface.
func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error {
return d.shard.Remove(ctx, name)
}
// GetNode implements the `Directory` interface.
func (d *HAMTDirectory) GetNode() (ipld.Node, error) {
return d.shard.Node()
}
// GetPrefix implements the `Directory` interface.
func (d *HAMTDirectory) GetPrefix() *cid.Prefix {
return d.shard.Prefix()
}

View File

@ -1,158 +0,0 @@
package io
import (
"context"
"fmt"
"testing"
mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
)
func TestEmptyNode(t *testing.T) {
n := ft.EmptyDirNode()
if len(n.Links()) != 0 {
t.Fatal("empty node should have 0 links")
}
}
func TestDirectoryGrowth(t *testing.T) {
ds := mdtest.Mock()
dir := NewDirectory(ds)
ctx := context.Background()
d := ft.EmptyDirNode()
ds.Add(ctx, d)
nelems := 10000
for i := 0; i < nelems; i++ {
err := dir.AddChild(ctx, fmt.Sprintf("dir%d", i), d)
if err != nil {
t.Fatal(err)
}
}
_, err := dir.GetNode()
if err != nil {
t.Fatal(err)
}
links, err := dir.Links(ctx)
if err != nil {
t.Fatal(err)
}
if len(links) != nelems {
t.Fatal("didnt get right number of elements")
}
dirc := d.Cid()
names := make(map[string]bool)
for _, l := range links {
names[l.Name] = true
if !l.Cid.Equals(dirc) {
t.Fatal("link wasnt correct")
}
}
for i := 0; i < nelems; i++ {
dn := fmt.Sprintf("dir%d", i)
if !names[dn] {
t.Fatal("didnt find directory: ", dn)
}
_, err := dir.Find(context.Background(), dn)
if err != nil {
t.Fatal(err)
}
}
}
func TestDuplicateAddDir(t *testing.T) {
ds := mdtest.Mock()
dir := NewDirectory(ds)
ctx := context.Background()
nd := ft.EmptyDirNode()
err := dir.AddChild(ctx, "test", nd)
if err != nil {
t.Fatal(err)
}
err = dir.AddChild(ctx, "test", nd)
if err != nil {
t.Fatal(err)
}
lnks, err := dir.Links(ctx)
if err != nil {
t.Fatal(err)
}
if len(lnks) != 1 {
t.Fatal("expected only one link")
}
}
func TestDirBuilder(t *testing.T) {
ds := mdtest.Mock()
dir := NewDirectory(ds)
ctx := context.Background()
child := ft.EmptyDirNode()
err := ds.Add(ctx, child)
if err != nil {
t.Fatal(err)
}
count := 5000
for i := 0; i < count; i++ {
err := dir.AddChild(ctx, fmt.Sprintf("entry %d", i), child)
if err != nil {
t.Fatal(err)
}
}
dirnd, err := dir.GetNode()
if err != nil {
t.Fatal(err)
}
links, err := dir.Links(ctx)
if err != nil {
t.Fatal(err)
}
if len(links) != count {
t.Fatal("not enough links dawg", len(links), count)
}
adir, err := NewDirectoryFromNode(ds, dirnd)
if err != nil {
t.Fatal(err)
}
links, err = adir.Links(ctx)
if err != nil {
t.Fatal(err)
}
names := make(map[string]bool)
for _, lnk := range links {
names[lnk.Name] = true
}
for i := 0; i < count; i++ {
n := fmt.Sprintf("entry %d", i)
if !names[n] {
t.Fatal("COULDNT FIND: ", n)
}
}
if len(links) != count {
t.Fatal("wrong number of links", len(links), count)
}
}

View File

@ -1,3 +0,0 @@
// Package io implements convenience objects for working with the ipfs
// unixfs data format.
package io

View File

@ -1,328 +0,0 @@
package io
import (
"context"
"errors"
"fmt"
"io"
mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
ftpb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// PBDagReader provides a way to easily read the data contained in a dag.
type PBDagReader struct {
serv ipld.NodeGetter
// UnixFS file (it should be of type `Data_File` or `Data_Raw` only).
file *ft.FSNode
// the current data buffer to be read from
// will either be a bytes.Reader or a child DagReader
buf ReadSeekCloser
// NodePromises for each of 'nodes' child links
promises []*ipld.NodePromise
// the cid of each child of the current node
links []*cid.Cid
// the index of the child link currently being read from
linkPosition int
// current offset for the read head within the 'file'
offset int64
// Our context
ctx context.Context
// context cancel for children
cancel func()
}
var _ DagReader = (*PBDagReader)(nil)
// NewPBFileReader constructs a new PBFileReader.
func NewPBFileReader(ctx context.Context, n *mdag.ProtoNode, file *ft.FSNode, serv ipld.NodeGetter) *PBDagReader {
fctx, cancel := context.WithCancel(ctx)
curLinks := getLinkCids(n)
return &PBDagReader{
serv: serv,
buf: NewBufDagReader(file.Data()),
promises: make([]*ipld.NodePromise, len(curLinks)),
links: curLinks,
ctx: fctx,
cancel: cancel,
file: file,
}
}
const preloadSize = 10
func (dr *PBDagReader) preload(ctx context.Context, beg int) {
end := beg + preloadSize
if end >= len(dr.links) {
end = len(dr.links)
}
copy(dr.promises[beg:], ipld.GetNodes(ctx, dr.serv, dr.links[beg:end]))
}
// precalcNextBuf follows the next link in line and loads it from the
// DAGService, setting the next buffer to read from
func (dr *PBDagReader) precalcNextBuf(ctx context.Context) error {
if dr.buf != nil {
dr.buf.Close() // Just to make sure
dr.buf = nil
}
if dr.linkPosition >= len(dr.promises) {
return io.EOF
}
// If we drop to <= preloadSize/2 preloading nodes, preload the next 10.
for i := dr.linkPosition; i < dr.linkPosition+preloadSize/2 && i < len(dr.promises); i++ {
// TODO: check if canceled.
if dr.promises[i] == nil {
dr.preload(ctx, i)
break
}
}
nxt, err := dr.promises[dr.linkPosition].Get(ctx)
dr.promises[dr.linkPosition] = nil
switch err {
case nil:
case context.DeadlineExceeded, context.Canceled:
err = ctx.Err()
if err != nil {
return ctx.Err()
}
// In this case, the context used to *preload* the node has been canceled.
// We need to retry the load with our context and we might as
// well preload some extra nodes while we're at it.
//
// Note: When using `Read`, this code will never execute as
// `Read` will use the global context. It only runs if the user
// explicitly reads with a custom context (e.g., by calling
// `CtxReadFull`).
dr.preload(ctx, dr.linkPosition)
nxt, err = dr.promises[dr.linkPosition].Get(ctx)
dr.promises[dr.linkPosition] = nil
if err != nil {
return err
}
default:
return err
}
dr.linkPosition++
return dr.loadBufNode(nxt)
}
func (dr *PBDagReader) loadBufNode(node ipld.Node) error {
switch node := node.(type) {
case *mdag.ProtoNode:
fsNode, err := ft.FSNodeFromBytes(node.Data())
if err != nil {
return fmt.Errorf("incorrectly formatted protobuf: %s", err)
}
switch fsNode.Type() {
case ftpb.Data_File:
dr.buf = NewPBFileReader(dr.ctx, node, fsNode, dr.serv)
return nil
case ftpb.Data_Raw:
dr.buf = NewBufDagReader(fsNode.Data())
return nil
default:
return fmt.Errorf("found %s node in unexpected place", fsNode.Type().String())
}
case *mdag.RawNode:
dr.buf = NewBufDagReader(node.RawData())
return nil
default:
return ErrUnkownNodeType
}
}
func getLinkCids(n ipld.Node) []*cid.Cid {
links := n.Links()
out := make([]*cid.Cid, 0, len(links))
for _, l := range links {
out = append(out, l.Cid)
}
return out
}
// Size return the total length of the data from the DAG structured file.
func (dr *PBDagReader) Size() uint64 {
return dr.file.FileSize()
}
// Read reads data from the DAG structured file
func (dr *PBDagReader) Read(b []byte) (int, error) {
return dr.CtxReadFull(dr.ctx, b)
}
// CtxReadFull reads data from the DAG structured file
func (dr *PBDagReader) CtxReadFull(ctx context.Context, b []byte) (int, error) {
if dr.buf == nil {
if err := dr.precalcNextBuf(ctx); err != nil {
return 0, err
}
}
// If no cached buffer, load one
total := 0
for {
// Attempt to fill bytes from cached buffer
n, err := io.ReadFull(dr.buf, b[total:])
total += n
dr.offset += int64(n)
switch err {
// io.EOF will happen is dr.buf had noting more to read (n == 0)
case io.EOF, io.ErrUnexpectedEOF:
// do nothing
case nil:
return total, nil
default:
return total, err
}
// if we are not done with the output buffer load next block
err = dr.precalcNextBuf(ctx)
if err != nil {
return total, err
}
}
}
// WriteTo writes to the given writer.
func (dr *PBDagReader) WriteTo(w io.Writer) (int64, error) {
if dr.buf == nil {
if err := dr.precalcNextBuf(dr.ctx); err != nil {
return 0, err
}
}
// If no cached buffer, load one
total := int64(0)
for {
// Attempt to write bytes from cached buffer
n, err := dr.buf.WriteTo(w)
total += n
dr.offset += n
if err != nil {
if err != io.EOF {
return total, err
}
}
// Otherwise, load up the next block
err = dr.precalcNextBuf(dr.ctx)
if err != nil {
if err == io.EOF {
return total, nil
}
return total, err
}
}
}
// Close closes the reader.
func (dr *PBDagReader) Close() error {
dr.cancel()
return nil
}
// Seek implements io.Seeker, and will seek to a given offset in the file
// interface matches standard unix seek
// TODO: check if we can do relative seeks, to reduce the amount of dagreader
// recreations that need to happen.
func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) {
switch whence {
case io.SeekStart:
if offset < 0 {
return -1, errors.New("invalid offset")
}
if offset == dr.offset {
return offset, nil
}
// left represents the number of bytes remaining to seek to (from beginning)
left := offset
if int64(len(dr.file.Data())) >= offset {
// Close current buf to close potential child dagreader
if dr.buf != nil {
dr.buf.Close()
}
dr.buf = NewBufDagReader(dr.file.Data()[offset:])
// start reading links from the beginning
dr.linkPosition = 0
dr.offset = offset
return offset, nil
}
// skip past root block data
left -= int64(len(dr.file.Data()))
// iterate through links and find where we need to be
for i := 0; i < dr.file.NumChildren(); i++ {
if dr.file.BlockSize(i) > uint64(left) {
dr.linkPosition = i
break
} else {
left -= int64(dr.file.BlockSize(i))
}
}
// start sub-block request
err := dr.precalcNextBuf(dr.ctx)
if err != nil {
return 0, err
}
// set proper offset within child readseeker
n, err := dr.buf.Seek(left, io.SeekStart)
if err != nil {
return -1, err
}
// sanity
left -= n
if left != 0 {
return -1, errors.New("failed to seek properly")
}
dr.offset = offset
return offset, nil
case io.SeekCurrent:
// TODO: be smarter here
if offset == 0 {
return dr.offset, nil
}
noffset := dr.offset + offset
return dr.Seek(noffset, io.SeekStart)
case io.SeekEnd:
noffset := int64(dr.file.FileSize()) - offset
n, err := dr.Seek(noffset, io.SeekStart)
// Return negative number if we can't figure out the file size. Using io.EOF
// for this seems to be good(-enough) solution as it's only returned by
// precalcNextBuf when we step out of file range.
// This is needed for gateway to function properly
if err == io.EOF && dr.file.Type() == ftpb.Data_File {
return -1, nil
}
return n, err
default:
return 0, errors.New("invalid whence")
}
}

View File

@ -1,58 +0,0 @@
package io
import (
"context"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
hamt "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/hamt"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// ResolveUnixfsOnce resolves a single hop of a path through a graph in a
// unixfs context. This includes handling traversing sharded directories.
func ResolveUnixfsOnce(ctx context.Context, ds ipld.NodeGetter, nd ipld.Node, names []string) (*ipld.Link, []string, error) {
switch nd := nd.(type) {
case *dag.ProtoNode:
upb, err := ft.FromBytes(nd.Data())
if err != nil {
// Not a unixfs node, use standard object traversal code
lnk, err := nd.GetNodeLink(names[0])
if err != nil {
return nil, nil, err
}
return lnk, names[1:], nil
}
switch upb.GetType() {
case ft.THAMTShard:
rods := dag.NewReadOnlyDagService(ds)
s, err := hamt.NewHamtFromDag(rods, nd)
if err != nil {
return nil, nil, err
}
out, err := s.Find(ctx, names[0])
if err != nil {
return nil, nil, err
}
return out, names[1:], nil
default:
lnk, err := nd.GetNodeLink(names[0])
if err != nil {
return nil, nil, err
}
return lnk, names[1:], nil
}
default:
lnk, rest, err := nd.ResolveLink(names)
if err != nil {
return nil, nil, err
}
return lnk, rest, nil
}
}

View File

@ -1,592 +0,0 @@
// Package mod provides DAG modification utilities to, for example,
// insert additional nodes in a unixfs DAG or truncate them.
package mod
import (
"bytes"
"context"
"errors"
"io"
mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
help "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle"
uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io"
chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// Common errors
var (
ErrSeekFail = errors.New("failed to seek properly")
ErrUnrecognizedWhence = errors.New("unrecognized whence")
ErrNotUnixfs = errors.New("dagmodifier only supports unixfs nodes (proto or raw)")
)
// 2MB
var writebufferSize = 1 << 21
// DagModifier is the only struct licensed and able to correctly
// perform surgery on a DAG 'file'
// Dear god, please rename this to something more pleasant
type DagModifier struct {
dagserv ipld.DAGService
curNode ipld.Node
splitter chunker.SplitterGen
ctx context.Context
readCancel func()
writeStart uint64
curWrOff uint64
wrBuf *bytes.Buffer
Prefix cid.Prefix
RawLeaves bool
read uio.DagReader
}
// NewDagModifier returns a new DagModifier, the Cid prefix for newly
// created nodes will be inhered from the passed in node. If the Cid
// version if not 0 raw leaves will also be enabled. The Prefix and
// RawLeaves options can be overridden by changing them after the call.
func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, spl chunker.SplitterGen) (*DagModifier, error) {
switch from.(type) {
case *mdag.ProtoNode, *mdag.RawNode:
// ok
default:
return nil, ErrNotUnixfs
}
prefix := from.Cid().Prefix()
prefix.Codec = cid.DagProtobuf
rawLeaves := false
if prefix.Version > 0 {
rawLeaves = true
}
return &DagModifier{
curNode: from.Copy(),
dagserv: serv,
splitter: spl,
ctx: ctx,
Prefix: prefix,
RawLeaves: rawLeaves,
}, nil
}
// WriteAt will modify a dag file in place
func (dm *DagModifier) WriteAt(b []byte, offset int64) (int, error) {
// TODO: this is currently VERY inefficient
// each write that happens at an offset other than the current one causes a
// flush to disk, and dag rewrite
if offset == int64(dm.writeStart) && dm.wrBuf != nil {
// If we would overwrite the previous write
if len(b) >= dm.wrBuf.Len() {
dm.wrBuf.Reset()
}
} else if uint64(offset) != dm.curWrOff {
size, err := dm.Size()
if err != nil {
return 0, err
}
if offset > size {
err := dm.expandSparse(offset - size)
if err != nil {
return 0, err
}
}
err = dm.Sync()
if err != nil {
return 0, err
}
dm.writeStart = uint64(offset)
}
return dm.Write(b)
}
// A reader that just returns zeros
type zeroReader struct{}
func (zr zeroReader) Read(b []byte) (int, error) {
for i := range b {
b[i] = 0
}
return len(b), nil
}
// expandSparse grows the file with zero blocks of 4096
// A small blocksize is chosen to aid in deduplication
func (dm *DagModifier) expandSparse(size int64) error {
r := io.LimitReader(zeroReader{}, size)
spl := chunker.NewSizeSplitter(r, 4096)
nnode, err := dm.appendData(dm.curNode, spl)
if err != nil {
return err
}
err = dm.dagserv.Add(dm.ctx, nnode)
return err
}
// Write continues writing to the dag at the current offset
func (dm *DagModifier) Write(b []byte) (int, error) {
if dm.read != nil {
dm.read = nil
}
if dm.wrBuf == nil {
dm.wrBuf = new(bytes.Buffer)
}
n, err := dm.wrBuf.Write(b)
if err != nil {
return n, err
}
dm.curWrOff += uint64(n)
if dm.wrBuf.Len() > writebufferSize {
err := dm.Sync()
if err != nil {
return n, err
}
}
return n, nil
}
// Size returns the Filesize of the node
func (dm *DagModifier) Size() (int64, error) {
fileSize, err := fileSize(dm.curNode)
if err != nil {
return 0, err
}
if dm.wrBuf != nil && int64(dm.wrBuf.Len())+int64(dm.writeStart) > int64(fileSize) {
return int64(dm.wrBuf.Len()) + int64(dm.writeStart), nil
}
return int64(fileSize), nil
}
func fileSize(n ipld.Node) (uint64, error) {
switch nd := n.(type) {
case *mdag.ProtoNode:
f, err := ft.FromBytes(nd.Data())
if err != nil {
return 0, err
}
return f.GetFilesize(), nil
case *mdag.RawNode:
return uint64(len(nd.RawData())), nil
default:
return 0, ErrNotUnixfs
}
}
// Sync writes changes to this dag to disk
func (dm *DagModifier) Sync() error {
// No buffer? Nothing to do
if dm.wrBuf == nil {
return nil
}
// If we have an active reader, kill it
if dm.read != nil {
dm.read = nil
dm.readCancel()
}
// Number of bytes we're going to write
buflen := dm.wrBuf.Len()
// overwrite existing dag nodes
thisc, err := dm.modifyDag(dm.curNode, dm.writeStart)
if err != nil {
return err
}
dm.curNode, err = dm.dagserv.Get(dm.ctx, thisc)
if err != nil {
return err
}
// need to write past end of current dag
if dm.wrBuf.Len() > 0 {
dm.curNode, err = dm.appendData(dm.curNode, dm.splitter(dm.wrBuf))
if err != nil {
return err
}
err = dm.dagserv.Add(dm.ctx, dm.curNode)
if err != nil {
return err
}
}
dm.writeStart += uint64(buflen)
dm.wrBuf = nil
return nil
}
// modifyDag writes the data in 'dm.wrBuf' over the data in 'node' starting at 'offset'
// returns the new key of the passed in node.
func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (*cid.Cid, error) {
// If we've reached a leaf node.
if len(n.Links()) == 0 {
switch nd0 := n.(type) {
case *mdag.ProtoNode:
f, err := ft.FromBytes(nd0.Data())
if err != nil {
return nil, err
}
_, err = dm.wrBuf.Read(f.Data[offset:])
if err != nil && err != io.EOF {
return nil, err
}
// Update newly written node..
b, err := proto.Marshal(f)
if err != nil {
return nil, err
}
nd := new(mdag.ProtoNode)
nd.SetData(b)
nd.SetPrefix(&nd0.Prefix)
err = dm.dagserv.Add(dm.ctx, nd)
if err != nil {
return nil, err
}
return nd.Cid(), nil
case *mdag.RawNode:
origData := nd0.RawData()
bytes := make([]byte, len(origData))
// copy orig data up to offset
copy(bytes, origData[:offset])
// copy in new data
n, err := dm.wrBuf.Read(bytes[offset:])
if err != nil && err != io.EOF {
return nil, err
}
// copy remaining data
offsetPlusN := int(offset) + n
if offsetPlusN < len(origData) {
copy(bytes[offsetPlusN:], origData[offsetPlusN:])
}
nd, err := mdag.NewRawNodeWPrefix(bytes, nd0.Cid().Prefix())
if err != nil {
return nil, err
}
err = dm.dagserv.Add(dm.ctx, nd)
if err != nil {
return nil, err
}
return nd.Cid(), nil
}
}
node, ok := n.(*mdag.ProtoNode)
if !ok {
return nil, ErrNotUnixfs
}
f, err := ft.FromBytes(node.Data())
if err != nil {
return nil, err
}
var cur uint64
for i, bs := range f.GetBlocksizes() {
// We found the correct child to write into
if cur+bs > offset {
child, err := node.Links()[i].GetNode(dm.ctx, dm.dagserv)
if err != nil {
return nil, err
}
k, err := dm.modifyDag(child, offset-cur)
if err != nil {
return nil, err
}
node.Links()[i].Cid = k
// Recache serialized node
_, err = node.EncodeProtobuf(true)
if err != nil {
return nil, err
}
if dm.wrBuf.Len() == 0 {
// No more bytes to write!
break
}
offset = cur + bs
}
cur += bs
}
err = dm.dagserv.Add(dm.ctx, node)
return node.Cid(), err
}
// appendData appends the blocks from the given chan to the end of this dag
func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node, error) {
switch nd := nd.(type) {
case *mdag.ProtoNode, *mdag.RawNode:
dbp := &help.DagBuilderParams{
Dagserv: dm.dagserv,
Maxlinks: help.DefaultLinksPerBlock,
Prefix: &dm.Prefix,
RawLeaves: dm.RawLeaves,
}
return trickle.Append(dm.ctx, nd, dbp.New(spl))
default:
return nil, ErrNotUnixfs
}
}
// Read data from this dag starting at the current offset
func (dm *DagModifier) Read(b []byte) (int, error) {
err := dm.readPrep()
if err != nil {
return 0, err
}
n, err := dm.read.Read(b)
dm.curWrOff += uint64(n)
return n, err
}
func (dm *DagModifier) readPrep() error {
err := dm.Sync()
if err != nil {
return err
}
if dm.read == nil {
ctx, cancel := context.WithCancel(dm.ctx)
dr, err := uio.NewDagReader(ctx, dm.curNode, dm.dagserv)
if err != nil {
cancel()
return err
}
i, err := dr.Seek(int64(dm.curWrOff), io.SeekStart)
if err != nil {
cancel()
return err
}
if i != int64(dm.curWrOff) {
cancel()
return ErrSeekFail
}
dm.readCancel = cancel
dm.read = dr
}
return nil
}
// CtxReadFull reads data from this dag starting at the current offset
func (dm *DagModifier) CtxReadFull(ctx context.Context, b []byte) (int, error) {
err := dm.readPrep()
if err != nil {
return 0, err
}
n, err := dm.read.CtxReadFull(ctx, b)
dm.curWrOff += uint64(n)
return n, err
}
// GetNode gets the modified DAG Node
func (dm *DagModifier) GetNode() (ipld.Node, error) {
err := dm.Sync()
if err != nil {
return nil, err
}
return dm.curNode.Copy(), nil
}
// HasChanges returned whether or not there are unflushed changes to this dag
func (dm *DagModifier) HasChanges() bool {
return dm.wrBuf != nil
}
// Seek modifies the offset according to whence. See unixfs/io for valid whence
// values.
func (dm *DagModifier) Seek(offset int64, whence int) (int64, error) {
err := dm.Sync()
if err != nil {
return 0, err
}
fisize, err := dm.Size()
if err != nil {
return 0, err
}
var newoffset uint64
switch whence {
case io.SeekCurrent:
newoffset = dm.curWrOff + uint64(offset)
case io.SeekStart:
newoffset = uint64(offset)
case io.SeekEnd:
newoffset = uint64(fisize) - uint64(offset)
default:
return 0, ErrUnrecognizedWhence
}
if int64(newoffset) > fisize {
if err := dm.expandSparse(int64(newoffset) - fisize); err != nil {
return 0, err
}
}
dm.curWrOff = newoffset
dm.writeStart = newoffset
if dm.read != nil {
_, err = dm.read.Seek(offset, whence)
if err != nil {
return 0, err
}
}
return int64(dm.curWrOff), nil
}
// Truncate truncates the current Node to 'size' and replaces it with the
// new one.
func (dm *DagModifier) Truncate(size int64) error {
err := dm.Sync()
if err != nil {
return err
}
realSize, err := dm.Size()
if err != nil {
return err
}
if size == int64(realSize) {
return nil
}
// Truncate can also be used to expand the file
if size > int64(realSize) {
return dm.expandSparse(int64(size) - realSize)
}
nnode, err := dagTruncate(dm.ctx, dm.curNode, uint64(size), dm.dagserv)
if err != nil {
return err
}
err = dm.dagserv.Add(dm.ctx, nnode)
if err != nil {
return err
}
dm.curNode = nnode
return nil
}
// dagTruncate truncates the given node to 'size' and returns the modified Node
func dagTruncate(ctx context.Context, n ipld.Node, size uint64, ds ipld.DAGService) (ipld.Node, error) {
if len(n.Links()) == 0 {
switch nd := n.(type) {
case *mdag.ProtoNode:
// TODO: this can likely be done without marshaling and remarshaling
pbn, err := ft.FromBytes(nd.Data())
if err != nil {
return nil, err
}
nd.SetData(ft.WrapData(pbn.Data[:size]))
return nd, nil
case *mdag.RawNode:
return mdag.NewRawNodeWPrefix(nd.RawData()[:size], nd.Cid().Prefix())
}
}
nd, ok := n.(*mdag.ProtoNode)
if !ok {
return nil, ErrNotUnixfs
}
var cur uint64
end := 0
var modified ipld.Node
ndata, err := ft.FSNodeFromBytes(nd.Data())
if err != nil {
return nil, err
}
// Reset the block sizes of the node to adjust them
// with the new values of the truncated children.
ndata.RemoveAllBlockSizes()
for i, lnk := range nd.Links() {
child, err := lnk.GetNode(ctx, ds)
if err != nil {
return nil, err
}
childsize, err := fileSize(child)
if err != nil {
return nil, err
}
// found the child we want to cut
if size < cur+childsize {
nchild, err := dagTruncate(ctx, child, size-cur, ds)
if err != nil {
return nil, err
}
ndata.AddBlockSize(size - cur)
modified = nchild
end = i
break
}
cur += childsize
ndata.AddBlockSize(childsize)
}
err = ds.Add(ctx, modified)
if err != nil {
return nil, err
}
nd.SetLinks(nd.Links()[:end])
err = nd.AddNodeLink("", modified)
if err != nil {
return nil, err
}
d, err := ndata.GetBytes()
if err != nil {
return nil, err
}
// Save the new block sizes to the original node.
nd.SetData(d)
// invalidate cache and recompute serialized data
_, err = nd.EncodeProtobuf(true)
if err != nil {
return nil, err
}
return nd, nil
}

View File

@ -1,803 +0,0 @@
package mod
import (
"context"
"fmt"
"io"
"io/ioutil"
"testing"
h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle"
uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io"
testu "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/test"
u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util"
)
func testModWrite(t *testing.T, beg, size uint64, orig []byte, dm *DagModifier, opts testu.NodeOpts) []byte {
newdata := make([]byte, size)
r := u.NewTimeSeededRand()
r.Read(newdata)
if size+beg > uint64(len(orig)) {
orig = append(orig, make([]byte, (size+beg)-uint64(len(orig)))...)
}
copy(orig[beg:], newdata)
nmod, err := dm.WriteAt(newdata, int64(beg))
if err != nil {
t.Fatal(err)
}
if nmod != int(size) {
t.Fatalf("Mod length not correct! %d != %d", nmod, size)
}
verifyNode(t, orig, dm, opts)
return orig
}
func verifyNode(t *testing.T, orig []byte, dm *DagModifier, opts testu.NodeOpts) {
nd, err := dm.GetNode()
if err != nil {
t.Fatal(err)
}
err = trickle.VerifyTrickleDagStructure(nd, trickle.VerifyParams{
Getter: dm.dagserv,
Direct: h.DefaultLinksPerBlock,
LayerRepeat: 4,
Prefix: &opts.Prefix,
RawLeaves: opts.RawLeavesUsed,
})
if err != nil {
t.Fatal(err)
}
rd, err := uio.NewDagReader(context.Background(), nd, dm.dagserv)
if err != nil {
t.Fatal(err)
}
after, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
err = testu.ArrComp(after, orig)
if err != nil {
t.Fatal(err)
}
}
func runAllSubtests(t *testing.T, tfunc func(*testing.T, testu.NodeOpts)) {
t.Run("opts=ProtoBufLeaves", func(t *testing.T) { tfunc(t, testu.UseProtoBufLeaves) })
t.Run("opts=RawLeaves", func(t *testing.T) { tfunc(t, testu.UseRawLeaves) })
t.Run("opts=CidV1", func(t *testing.T) { tfunc(t, testu.UseCidV1) })
t.Run("opts=Blake2b256", func(t *testing.T) { tfunc(t, testu.UseBlake2b256) })
}
func TestDagModifierBasic(t *testing.T) {
runAllSubtests(t, testDagModifierBasic)
}
func testDagModifierBasic(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
b, n := testu.GetRandomNode(t, dserv, 50000, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
// Within zero block
beg := uint64(15)
length := uint64(60)
t.Log("Testing mod within zero block")
b = testModWrite(t, beg, length, b, dagmod, opts)
// Within bounds of existing file
beg = 1000
length = 4000
t.Log("Testing mod within bounds of existing multiblock file.")
b = testModWrite(t, beg, length, b, dagmod, opts)
// Extend bounds
beg = 49500
length = 4000
t.Log("Testing mod that extends file.")
b = testModWrite(t, beg, length, b, dagmod, opts)
// "Append"
beg = uint64(len(b))
length = 3000
t.Log("Testing pure append")
_ = testModWrite(t, beg, length, b, dagmod, opts)
// Verify reported length
node, err := dagmod.GetNode()
if err != nil {
t.Fatal(err)
}
size, err := fileSize(node)
if err != nil {
t.Fatal(err)
}
expected := uint64(50000 + 3500 + 3000)
if size != expected {
t.Fatalf("Final reported size is incorrect [%d != %d]", size, expected)
}
}
func TestMultiWrite(t *testing.T) {
runAllSubtests(t, testMultiWrite)
}
func testMultiWrite(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
data := make([]byte, 4000)
u.NewTimeSeededRand().Read(data)
for i := 0; i < len(data); i++ {
n, err := dagmod.WriteAt(data[i:i+1], int64(i))
if err != nil {
t.Fatal(err)
}
if n != 1 {
t.Fatal("Somehow wrote the wrong number of bytes! (n != 1)")
}
size, err := dagmod.Size()
if err != nil {
t.Fatal(err)
}
if size != int64(i+1) {
t.Fatal("Size was reported incorrectly")
}
}
verifyNode(t, data, dagmod, opts)
}
func TestMultiWriteAndFlush(t *testing.T) {
runAllSubtests(t, testMultiWriteAndFlush)
}
func testMultiWriteAndFlush(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
data := make([]byte, 20)
u.NewTimeSeededRand().Read(data)
for i := 0; i < len(data); i++ {
n, err := dagmod.WriteAt(data[i:i+1], int64(i))
if err != nil {
t.Fatal(err)
}
if n != 1 {
t.Fatal("Somehow wrote the wrong number of bytes! (n != 1)")
}
err = dagmod.Sync()
if err != nil {
t.Fatal(err)
}
}
verifyNode(t, data, dagmod, opts)
}
func TestWriteNewFile(t *testing.T) {
runAllSubtests(t, testWriteNewFile)
}
func testWriteNewFile(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
towrite := make([]byte, 2000)
u.NewTimeSeededRand().Read(towrite)
nw, err := dagmod.Write(towrite)
if err != nil {
t.Fatal(err)
}
if nw != len(towrite) {
t.Fatal("Wrote wrong amount")
}
verifyNode(t, towrite, dagmod, opts)
}
func TestMultiWriteCoal(t *testing.T) {
runAllSubtests(t, testMultiWriteCoal)
}
func testMultiWriteCoal(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
data := make([]byte, 1000)
u.NewTimeSeededRand().Read(data)
for i := 0; i < len(data); i++ {
n, err := dagmod.WriteAt(data[:i+1], 0)
if err != nil {
fmt.Println("FAIL AT ", i)
t.Fatal(err)
}
if n != i+1 {
t.Fatal("Somehow wrote the wrong number of bytes! (n != 1)")
}
}
verifyNode(t, data, dagmod, opts)
}
func TestLargeWriteChunks(t *testing.T) {
runAllSubtests(t, testLargeWriteChunks)
}
func testLargeWriteChunks(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
wrsize := 1000
datasize := 10000000
data := make([]byte, datasize)
u.NewTimeSeededRand().Read(data)
for i := 0; i < datasize/wrsize; i++ {
n, err := dagmod.WriteAt(data[i*wrsize:(i+1)*wrsize], int64(i*wrsize))
if err != nil {
t.Fatal(err)
}
if n != wrsize {
t.Fatal("failed to write buffer")
}
}
_, err = dagmod.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(dagmod)
if err != nil {
t.Fatal(err)
}
if err = testu.ArrComp(out, data); err != nil {
t.Fatal(err)
}
}
func TestDagTruncate(t *testing.T) {
runAllSubtests(t, testDagTruncate)
}
func testDagTruncate(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
b, n := testu.GetRandomNode(t, dserv, 50000, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
err = dagmod.Truncate(12345)
if err != nil {
t.Fatal(err)
}
size, err := dagmod.Size()
if err != nil {
t.Fatal(err)
}
if size != 12345 {
t.Fatal("size was incorrect!")
}
_, err = dagmod.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(dagmod)
if err != nil {
t.Fatal(err)
}
if err = testu.ArrComp(out, b[:12345]); err != nil {
t.Fatal(err)
}
err = dagmod.Truncate(10)
if err != nil {
t.Fatal(err)
}
size, err = dagmod.Size()
if err != nil {
t.Fatal(err)
}
if size != 10 {
t.Fatal("size was incorrect!")
}
err = dagmod.Truncate(0)
if err != nil {
t.Fatal(err)
}
size, err = dagmod.Size()
if err != nil {
t.Fatal(err)
}
if size != 0 {
t.Fatal("size was incorrect!")
}
}
// TestDagTruncateSameSize tests that a DAG truncated
// to the same size (i.e., doing nothing) doesn't modify
// the DAG (its hash).
func TestDagTruncateSameSize(t *testing.T) {
runAllSubtests(t, testDagTruncateSameSize)
}
func testDagTruncateSameSize(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
_, n := testu.GetRandomNode(t, dserv, 50000, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
// Copied from `TestDagTruncate`.
size, err := dagmod.Size()
if err != nil {
t.Fatal(err)
}
err = dagmod.Truncate(size)
if err != nil {
t.Fatal(err)
}
modifiedNode, err := dagmod.GetNode()
if err != nil {
t.Fatal(err)
}
if modifiedNode.Cid().Equals(n.Cid()) == false {
t.Fatal("the node has been modified!")
}
}
func TestSparseWrite(t *testing.T) {
runAllSubtests(t, testSparseWrite)
}
func testSparseWrite(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
buf := make([]byte, 5000)
u.NewTimeSeededRand().Read(buf[2500:])
wrote, err := dagmod.WriteAt(buf[2500:], 2500)
if err != nil {
t.Fatal(err)
}
if wrote != 2500 {
t.Fatal("incorrect write amount")
}
_, err = dagmod.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(dagmod)
if err != nil {
t.Fatal(err)
}
if err = testu.ArrComp(out, buf); err != nil {
t.Fatal(err)
}
}
func TestSeekPastEndWrite(t *testing.T) {
runAllSubtests(t, testSeekPastEndWrite)
}
func testSeekPastEndWrite(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
buf := make([]byte, 5000)
u.NewTimeSeededRand().Read(buf[2500:])
nseek, err := dagmod.Seek(2500, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if nseek != 2500 {
t.Fatal("failed to seek")
}
wrote, err := dagmod.Write(buf[2500:])
if err != nil {
t.Fatal(err)
}
if wrote != 2500 {
t.Fatal("incorrect write amount")
}
_, err = dagmod.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
out, err := ioutil.ReadAll(dagmod)
if err != nil {
t.Fatal(err)
}
if err = testu.ArrComp(out, buf); err != nil {
t.Fatal(err)
}
}
func TestRelativeSeek(t *testing.T) {
runAllSubtests(t, testRelativeSeek)
}
func testRelativeSeek(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
for i := 0; i < 64; i++ {
dagmod.Write([]byte{byte(i)})
if _, err := dagmod.Seek(1, io.SeekCurrent); err != nil {
t.Fatal(err)
}
}
out, err := ioutil.ReadAll(dagmod)
if err != nil {
t.Fatal(err)
}
for i, v := range out {
if v != 0 && i/2 != int(v) {
t.Errorf("expected %d, at index %d, got %d", i/2, i, v)
}
}
}
func TestInvalidSeek(t *testing.T) {
runAllSubtests(t, testInvalidSeek)
}
func testInvalidSeek(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
_, err = dagmod.Seek(10, -10)
if err != ErrUnrecognizedWhence {
t.Fatal(err)
}
}
func TestEndSeek(t *testing.T) {
runAllSubtests(t, testEndSeek)
}
func testEndSeek(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
_, err = dagmod.Write(make([]byte, 100))
if err != nil {
t.Fatal(err)
}
offset, err := dagmod.Seek(0, io.SeekCurrent)
if err != nil {
t.Fatal(err)
}
if offset != 100 {
t.Fatal("expected the relative seek 0 to return current location")
}
offset, err = dagmod.Seek(0, io.SeekStart)
if err != nil {
t.Fatal(err)
}
if offset != 0 {
t.Fatal("expected the absolute seek to set offset at 0")
}
offset, err = dagmod.Seek(0, io.SeekEnd)
if err != nil {
t.Fatal(err)
}
if offset != 100 {
t.Fatal("expected the end seek to set offset at end")
}
}
func TestReadAndSeek(t *testing.T) {
runAllSubtests(t, testReadAndSeek)
}
func testReadAndSeek(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
writeBuf := []byte{0, 1, 2, 3, 4, 5, 6, 7}
dagmod.Write(writeBuf)
if !dagmod.HasChanges() {
t.Fatal("there are changes, this should be true")
}
readBuf := make([]byte, 4)
offset, err := dagmod.Seek(0, io.SeekStart)
if offset != 0 {
t.Fatal("expected offset to be 0")
}
if err != nil {
t.Fatal(err)
}
// read 0,1,2,3
c, err := dagmod.Read(readBuf)
if err != nil {
t.Fatal(err)
}
if c != 4 {
t.Fatalf("expected length of 4 got %d", c)
}
for i := byte(0); i < 4; i++ {
if readBuf[i] != i {
t.Fatalf("wrong value %d [at index %d]", readBuf[i], i)
}
}
// skip 4
_, err = dagmod.Seek(1, io.SeekCurrent)
if err != nil {
t.Fatalf("error: %s, offset %d, reader offset %d", err, dagmod.curWrOff, getOffset(dagmod.read))
}
//read 5,6,7
readBuf = make([]byte, 3)
c, err = dagmod.Read(readBuf)
if err != nil {
t.Fatal(err)
}
if c != 3 {
t.Fatalf("expected length of 3 got %d", c)
}
for i := byte(0); i < 3; i++ {
if readBuf[i] != i+5 {
t.Fatalf("wrong value %d [at index %d]", readBuf[i], i)
}
}
}
func TestCtxRead(t *testing.T) {
runAllSubtests(t, testCtxRead)
}
func testCtxRead(t *testing.T, opts testu.NodeOpts) {
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(t, dserv, opts)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
t.Fatal(err)
}
if opts.ForceRawLeaves {
dagmod.RawLeaves = true
}
_, err = dagmod.Write([]byte{0, 1, 2, 3, 4, 5, 6, 7})
if err != nil {
t.Fatal(err)
}
dagmod.Seek(0, io.SeekStart)
readBuf := make([]byte, 4)
_, err = dagmod.CtxReadFull(ctx, readBuf)
if err != nil {
t.Fatal(err)
}
err = testu.ArrComp(readBuf, []byte{0, 1, 2, 3})
if err != nil {
t.Fatal(err)
}
// TODO(Kubuxu): context cancel case, I will do it after I figure out dagreader tests,
// because this is exacelly the same.
}
func BenchmarkDagmodWrite(b *testing.B) {
b.StopTimer()
dserv := testu.GetDAGServ()
n := testu.GetEmptyNode(b, dserv, testu.UseProtoBufLeaves)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
wrsize := 4096
dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512))
if err != nil {
b.Fatal(err)
}
buf := make([]byte, b.N*wrsize)
u.NewTimeSeededRand().Read(buf)
b.StartTimer()
b.SetBytes(int64(wrsize))
for i := 0; i < b.N; i++ {
n, err := dagmod.Write(buf[i*wrsize : (i+1)*wrsize])
if err != nil {
b.Fatal(err)
}
if n != wrsize {
b.Fatal("Wrote bad size")
}
}
}
func getOffset(reader uio.DagReader) int64 {
offset, err := reader.Seek(0, io.SeekCurrent)
if err != nil {
panic("failed to retrieve offset: " + err.Error())
}
return offset
}

View File

@ -1,8 +0,0 @@
include mk/header.mk
PB_$(d) = $(wildcard $(d)/*.proto)
TGTS_$(d) = $(PB_$(d):.proto=.pb.go)
#DEPS_GO += $(TGTS_$(d))
include mk/footer.mk

View File

@ -1,147 +0,0 @@
// Code generated by protoc-gen-gogo.
// source: unixfs.proto
// DO NOT EDIT!
/*
Package unixfs_pb is a generated protocol buffer package.
It is generated from these files:
unixfs.proto
It has these top-level messages:
Data
Metadata
*/
package unixfs_pb
import proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto"
import fmt "fmt"
import math "math"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
type Data_DataType int32
const (
Data_Raw Data_DataType = 0
Data_Directory Data_DataType = 1
Data_File Data_DataType = 2
Data_Metadata Data_DataType = 3
Data_Symlink Data_DataType = 4
Data_HAMTShard Data_DataType = 5
)
var Data_DataType_name = map[int32]string{
0: "Raw",
1: "Directory",
2: "File",
3: "Metadata",
4: "Symlink",
5: "HAMTShard",
}
var Data_DataType_value = map[string]int32{
"Raw": 0,
"Directory": 1,
"File": 2,
"Metadata": 3,
"Symlink": 4,
"HAMTShard": 5,
}
func (x Data_DataType) Enum() *Data_DataType {
p := new(Data_DataType)
*p = x
return p
}
func (x Data_DataType) String() string {
return proto.EnumName(Data_DataType_name, int32(x))
}
func (x *Data_DataType) UnmarshalJSON(data []byte) error {
value, err := proto.UnmarshalJSONEnum(Data_DataType_value, data, "Data_DataType")
if err != nil {
return err
}
*x = Data_DataType(value)
return nil
}
type Data struct {
Type *Data_DataType `protobuf:"varint,1,req,name=Type,enum=unixfs.pb.Data_DataType" json:"Type,omitempty"`
Data []byte `protobuf:"bytes,2,opt,name=Data" json:"Data,omitempty"`
Filesize *uint64 `protobuf:"varint,3,opt,name=filesize" json:"filesize,omitempty"`
Blocksizes []uint64 `protobuf:"varint,4,rep,name=blocksizes" json:"blocksizes,omitempty"`
HashType *uint64 `protobuf:"varint,5,opt,name=hashType" json:"hashType,omitempty"`
Fanout *uint64 `protobuf:"varint,6,opt,name=fanout" json:"fanout,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Data) Reset() { *m = Data{} }
func (m *Data) String() string { return proto.CompactTextString(m) }
func (*Data) ProtoMessage() {}
func (m *Data) GetType() Data_DataType {
if m != nil && m.Type != nil {
return *m.Type
}
return Data_Raw
}
func (m *Data) GetData() []byte {
if m != nil {
return m.Data
}
return nil
}
func (m *Data) GetFilesize() uint64 {
if m != nil && m.Filesize != nil {
return *m.Filesize
}
return 0
}
func (m *Data) GetBlocksizes() []uint64 {
if m != nil {
return m.Blocksizes
}
return nil
}
func (m *Data) GetHashType() uint64 {
if m != nil && m.HashType != nil {
return *m.HashType
}
return 0
}
func (m *Data) GetFanout() uint64 {
if m != nil && m.Fanout != nil {
return *m.Fanout
}
return 0
}
type Metadata struct {
MimeType *string `protobuf:"bytes,1,opt,name=MimeType" json:"MimeType,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Metadata) Reset() { *m = Metadata{} }
func (m *Metadata) String() string { return proto.CompactTextString(m) }
func (*Metadata) ProtoMessage() {}
func (m *Metadata) GetMimeType() string {
if m != nil && m.MimeType != nil {
return *m.MimeType
}
return ""
}
func init() {
proto.RegisterType((*Data)(nil), "unixfs.pb.Data")
proto.RegisterType((*Metadata)(nil), "unixfs.pb.Metadata")
proto.RegisterEnum("unixfs.pb.Data_DataType", Data_DataType_name, Data_DataType_value)
}

View File

@ -1,24 +0,0 @@
package unixfs.pb;
message Data {
enum DataType {
Raw = 0;
Directory = 1;
File = 2;
Metadata = 3;
Symlink = 4;
HAMTShard = 5;
}
required DataType Type = 1;
optional bytes Data = 2;
optional uint64 filesize = 3;
repeated uint64 blocksizes = 4;
optional uint64 hashType = 5;
optional uint64 fanout = 6;
}
message Metadata {
optional string MimeType = 1;
}

View File

@ -1,135 +0,0 @@
package testu
import (
"bytes"
"context"
"fmt"
"io"
"io/ioutil"
"testing"
mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
mdagmock "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test"
ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs"
h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers"
trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle"
u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util"
mh "gx/ipfs/QmPnFwZ2JXKnXgMw8CdBPxn7FWh6LLdjUjxV1fKHuJnkr8/go-multihash"
chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker"
cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid"
ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format"
)
// SizeSplitterGen creates a generator.
func SizeSplitterGen(size int64) chunker.SplitterGen {
return func(r io.Reader) chunker.Splitter {
return chunker.NewSizeSplitter(r, size)
}
}
// GetDAGServ returns a mock DAGService.
func GetDAGServ() ipld.DAGService {
return mdagmock.Mock()
}
// NodeOpts is used by GetNode, GetEmptyNode and GetRandomNode
type NodeOpts struct {
Prefix cid.Prefix
// ForceRawLeaves if true will force the use of raw leaves
ForceRawLeaves bool
// RawLeavesUsed is true if raw leaves or either implicitly or explicitly enabled
RawLeavesUsed bool
}
// Some shorthands for NodeOpts.
var (
UseProtoBufLeaves = NodeOpts{Prefix: mdag.V0CidPrefix()}
UseRawLeaves = NodeOpts{Prefix: mdag.V0CidPrefix(), ForceRawLeaves: true, RawLeavesUsed: true}
UseCidV1 = NodeOpts{Prefix: mdag.V1CidPrefix(), RawLeavesUsed: true}
UseBlake2b256 NodeOpts
)
func init() {
UseBlake2b256 = UseCidV1
UseBlake2b256.Prefix.MhType = mh.Names["blake2b-256"]
UseBlake2b256.Prefix.MhLength = -1
}
// GetNode returns a unixfs file node with the specified data.
func GetNode(t testing.TB, dserv ipld.DAGService, data []byte, opts NodeOpts) ipld.Node {
in := bytes.NewReader(data)
dbp := h.DagBuilderParams{
Dagserv: dserv,
Maxlinks: h.DefaultLinksPerBlock,
Prefix: &opts.Prefix,
RawLeaves: opts.RawLeavesUsed,
}
node, err := trickle.Layout(dbp.New(SizeSplitterGen(500)(in)))
if err != nil {
t.Fatal(err)
}
return node
}
// GetEmptyNode returns an empty unixfs file node.
func GetEmptyNode(t testing.TB, dserv ipld.DAGService, opts NodeOpts) ipld.Node {
return GetNode(t, dserv, []byte{}, opts)
}
// GetRandomNode returns a random unixfs file node.
func GetRandomNode(t testing.TB, dserv ipld.DAGService, size int64, opts NodeOpts) ([]byte, ipld.Node) {
in := io.LimitReader(u.NewTimeSeededRand(), size)
buf, err := ioutil.ReadAll(in)
if err != nil {
t.Fatal(err)
}
node := GetNode(t, dserv, buf, opts)
return buf, node
}
// ArrComp checks if two byte slices are the same.
func ArrComp(a, b []byte) error {
if len(a) != len(b) {
return fmt.Errorf("arrays differ in length. %d != %d", len(a), len(b))
}
for i, v := range a {
if v != b[i] {
return fmt.Errorf("arrays differ at index: %d", i)
}
}
return nil
}
// PrintDag pretty-prints the given dag to stdout.
func PrintDag(nd *mdag.ProtoNode, ds ipld.DAGService, indent int) {
pbd, err := ft.FromBytes(nd.Data())
if err != nil {
panic(err)
}
for i := 0; i < indent; i++ {
fmt.Print(" ")
}
fmt.Printf("{size = %d, type = %s, children = %d", pbd.GetFilesize(), pbd.GetType().String(), len(pbd.GetBlocksizes()))
if len(nd.Links()) > 0 {
fmt.Println()
}
for _, lnk := range nd.Links() {
child, err := lnk.GetNode(context.Background(), ds)
if err != nil {
panic(err)
}
PrintDag(child.(*mdag.ProtoNode), ds, indent+1)
}
if len(nd.Links()) > 0 {
for i := 0; i < indent; i++ {
fmt.Print(" ")
}
}
fmt.Println("}")
}

View File

@ -1,305 +0,0 @@
// Package unixfs implements a data format for files in the IPFS filesystem It
// is not the only format in ipfs, but it is the one that the filesystem
// assumes
package unixfs
import (
"errors"
proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto"
dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag"
pb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb"
)
// Shorthands for protobuffer types
const (
TRaw = pb.Data_Raw
TFile = pb.Data_File
TDirectory = pb.Data_Directory
TMetadata = pb.Data_Metadata
TSymlink = pb.Data_Symlink
THAMTShard = pb.Data_HAMTShard
)
// Common errors
var (
ErrMalformedFileFormat = errors.New("malformed data in file format")
ErrUnrecognizedType = errors.New("unrecognized node type")
)
// FromBytes unmarshals a byte slice as protobuf Data.
func FromBytes(data []byte) (*pb.Data, error) {
pbdata := new(pb.Data)
err := proto.Unmarshal(data, pbdata)
if err != nil {
return nil, err
}
return pbdata, nil
}
// FilePBData creates a protobuf File with the given
// byte slice and returns the marshaled protobuf bytes representing it.
func FilePBData(data []byte, totalsize uint64) []byte {
pbfile := new(pb.Data)
typ := pb.Data_File
pbfile.Type = &typ
pbfile.Data = data
pbfile.Filesize = proto.Uint64(totalsize)
data, err := proto.Marshal(pbfile)
if err != nil {
// This really shouldnt happen, i promise
// The only failure case for marshal is if required fields
// are not filled out, and they all are. If the proto object
// gets changed and nobody updates this function, the code
// should panic due to programmer error
panic(err)
}
return data
}
//FolderPBData returns Bytes that represent a Directory.
func FolderPBData() []byte {
pbfile := new(pb.Data)
typ := pb.Data_Directory
pbfile.Type = &typ
data, err := proto.Marshal(pbfile)
if err != nil {
//this really shouldnt happen, i promise
panic(err)
}
return data
}
//WrapData marshals raw bytes into a `Data_Raw` type protobuf message.
func WrapData(b []byte) []byte {
pbdata := new(pb.Data)
typ := pb.Data_Raw
pbdata.Data = b
pbdata.Type = &typ
pbdata.Filesize = proto.Uint64(uint64(len(b)))
out, err := proto.Marshal(pbdata)
if err != nil {
// This shouldnt happen. seriously.
panic(err)
}
return out
}
//SymlinkData returns a `Data_Symlink` protobuf message for the path you specify.
func SymlinkData(path string) ([]byte, error) {
pbdata := new(pb.Data)
typ := pb.Data_Symlink
pbdata.Data = []byte(path)
pbdata.Type = &typ
out, err := proto.Marshal(pbdata)
if err != nil {
return nil, err
}
return out, nil
}
// UnwrapData unmarshals a protobuf messages and returns the contents.
func UnwrapData(data []byte) ([]byte, error) {
pbdata := new(pb.Data)
err := proto.Unmarshal(data, pbdata)
if err != nil {
return nil, err
}
return pbdata.GetData(), nil
}
// DataSize returns the size of the contents in protobuf wrapped slice.
// For raw data it simply provides the length of it. For Data_Files, it
// will return the associated filesize. Note that Data_Directories will
// return an error.
func DataSize(data []byte) (uint64, error) {
pbdata := new(pb.Data)
err := proto.Unmarshal(data, pbdata)
if err != nil {
return 0, err
}
switch pbdata.GetType() {
case pb.Data_Directory:
return 0, errors.New("can't get data size of directory")
case pb.Data_File:
return pbdata.GetFilesize(), nil
case pb.Data_Raw:
return uint64(len(pbdata.GetData())), nil
default:
return 0, errors.New("unrecognized node data type")
}
}
// An FSNode represents a filesystem object using the UnixFS specification.
//
// The `NewFSNode` constructor should be used instead of just calling `new(FSNode)`
// to guarantee that the required (`Type` and `Filesize`) fields in the `format`
// structure are initialized before marshaling (in `GetBytes()`).
type FSNode struct {
// UnixFS format defined as a protocol buffers message.
format pb.Data
}
// FSNodeFromBytes unmarshal a protobuf message onto an FSNode.
func FSNodeFromBytes(b []byte) (*FSNode, error) {
n := new(FSNode)
err := proto.Unmarshal(b, &n.format)
if err != nil {
return nil, err
}
return n, nil
}
// NewFSNode creates a new FSNode structure with the given `dataType`.
//
// It initializes the (required) `Type` field (that doesn't have a `Set()`
// accessor so it must be specified at creation), otherwise the `Marshal()`
// method in `GetBytes()` would fail (`required field "Type" not set`).
//
// It also initializes the `Filesize` pointer field to ensure its value
// is never nil before marshaling, this is not a required field but it is
// done to be backwards compatible with previous `go-ipfs` versions hash.
// (If it wasn't initialized there could be cases where `Filesize` could
// have been left at nil, when the `FSNode` was created but no data or
// child nodes were set to adjust it, as is the case in `NewLeaf()`.)
func NewFSNode(dataType pb.Data_DataType) *FSNode {
n := new(FSNode)
n.format.Type = &dataType
// Initialize by `Filesize` by updating it with a dummy (zero) value.
n.UpdateFilesize(0)
return n
}
// AddBlockSize adds the size of the next child block of this node
func (n *FSNode) AddBlockSize(s uint64) {
n.UpdateFilesize(int64(s))
n.format.Blocksizes = append(n.format.Blocksizes, s)
}
// RemoveBlockSize removes the given child block's size.
func (n *FSNode) RemoveBlockSize(i int) {
n.UpdateFilesize(-int64(n.format.Blocksizes[i]))
n.format.Blocksizes = append(n.format.Blocksizes[:i], n.format.Blocksizes[i+1:]...)
}
// BlockSize returns the block size indexed by `i`.
// TODO: Evaluate if this function should be bounds checking.
func (n *FSNode) BlockSize(i int) uint64 {
return n.format.Blocksizes[i]
}
// RemoveAllBlockSizes removes all the child block sizes of this node.
func (n *FSNode) RemoveAllBlockSizes() {
n.format.Blocksizes = []uint64{}
n.format.Filesize = proto.Uint64(uint64(len(n.Data())))
}
// GetBytes marshals this node as a protobuf message.
func (n *FSNode) GetBytes() ([]byte, error) {
return proto.Marshal(&n.format)
}
// FileSize returns the total size of this tree. That is, the size of
// the data in this node plus the size of all its children.
func (n *FSNode) FileSize() uint64 {
return n.format.GetFilesize()
}
// NumChildren returns the number of child blocks of this node
func (n *FSNode) NumChildren() int {
return len(n.format.Blocksizes)
}
// Data retrieves the `Data` field from the internal `format`.
func (n *FSNode) Data() []byte {
return n.format.GetData()
}
// SetData sets the `Data` field from the internal `format`
// updating its `Filesize`.
func (n *FSNode) SetData(newData []byte) {
n.UpdateFilesize(int64(len(newData) - len(n.Data())))
n.format.Data = newData
}
// UpdateFilesize updates the `Filesize` field from the internal `format`
// by a signed difference (`filesizeDiff`).
// TODO: Add assert to check for `Filesize` > 0?
func (n *FSNode) UpdateFilesize(filesizeDiff int64) {
n.format.Filesize = proto.Uint64(uint64(
int64(n.format.GetFilesize()) + filesizeDiff))
}
// Type retrieves the `Type` field from the internal `format`.
func (n *FSNode) Type() pb.Data_DataType {
return n.format.GetType()
}
// Metadata is used to store additional FSNode information.
type Metadata struct {
MimeType string
Size uint64
}
// MetadataFromBytes Unmarshals a protobuf Data message into Metadata.
// The provided slice should have been encoded with BytesForMetadata().
func MetadataFromBytes(b []byte) (*Metadata, error) {
pbd := new(pb.Data)
err := proto.Unmarshal(b, pbd)
if err != nil {
return nil, err
}
if pbd.GetType() != pb.Data_Metadata {
return nil, errors.New("incorrect node type")
}
pbm := new(pb.Metadata)
err = proto.Unmarshal(pbd.Data, pbm)
if err != nil {
return nil, err
}
md := new(Metadata)
md.MimeType = pbm.GetMimeType()
return md, nil
}
// Bytes marshals Metadata as a protobuf message of Metadata type.
func (m *Metadata) Bytes() ([]byte, error) {
pbm := new(pb.Metadata)
pbm.MimeType = &m.MimeType
return proto.Marshal(pbm)
}
// BytesForMetadata wraps the given Metadata as a profobuf message of Data type,
// setting the DataType to Metadata. The wrapped bytes are itself the
// result of calling m.Bytes().
func BytesForMetadata(m *Metadata) ([]byte, error) {
pbd := new(pb.Data)
pbd.Filesize = proto.Uint64(m.Size)
typ := pb.Data_Metadata
pbd.Type = &typ
mdd, err := m.Bytes()
if err != nil {
return nil, err
}
pbd.Data = mdd
return proto.Marshal(pbd)
}
// EmptyDirNode creates an empty folder Protonode.
func EmptyDirNode() *dag.ProtoNode {
return dag.NodeWithData(FolderPBData())
}

View File

@ -1,160 +0,0 @@
package unixfs
import (
"bytes"
"testing"
proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto"
pb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb"
)
func TestFSNode(t *testing.T) {
fsn := NewFSNode(TFile)
for i := 0; i < 16; i++ {
fsn.AddBlockSize(100)
}
fsn.RemoveBlockSize(15)
fsn.SetData(make([]byte, 128))
b, err := fsn.GetBytes()
if err != nil {
t.Fatal(err)
}
pbn := new(pb.Data)
err = proto.Unmarshal(b, pbn)
if err != nil {
t.Fatal(err)
}
ds, err := DataSize(b)
if err != nil {
t.Fatal(err)
}
nKids := fsn.NumChildren()
if nKids != 15 {
t.Fatal("Wrong number of child nodes")
}
if ds != (100*15)+128 {
t.Fatal("Datasize calculations incorrect!")
}
nfsn, err := FSNodeFromBytes(b)
if err != nil {
t.Fatal(err)
}
if nfsn.FileSize() != (100*15)+128 {
t.Fatal("fsNode FileSize calculations incorrect")
}
}
func TestPBdataTools(t *testing.T) {
raw := []byte{0x00, 0x01, 0x02, 0x17, 0xA1}
rawPB := WrapData(raw)
pbDataSize, err := DataSize(rawPB)
if err != nil {
t.Fatal(err)
}
same := len(raw) == int(pbDataSize)
if !same {
t.Fatal("WrapData changes the size of data.")
}
rawPBBytes, err := UnwrapData(rawPB)
if err != nil {
t.Fatal(err)
}
same = bytes.Equal(raw, rawPBBytes)
if !same {
t.Fatal("Unwrap failed to produce the correct wrapped data.")
}
rawPBdata, err := FromBytes(rawPB)
if err != nil {
t.Fatal(err)
}
isRaw := rawPBdata.GetType() == TRaw
if !isRaw {
t.Fatal("WrapData does not create pb.Data_Raw!")
}
catFile := []byte("Mr_Meowgie.gif")
catPBfile := FilePBData(catFile, 17)
catSize, err := DataSize(catPBfile)
if catSize != 17 {
t.Fatal("FilePBData is the wrong size.")
}
if err != nil {
t.Fatal(err)
}
dirPB := FolderPBData()
dir, err := FromBytes(dirPB)
isDir := dir.GetType() == TDirectory
if !isDir {
t.Fatal("FolderPBData does not create a directory!")
}
if err != nil {
t.Fatal(err)
}
_, dirErr := DataSize(dirPB)
if dirErr == nil {
t.Fatal("DataSize didn't throw an error when taking the size of a directory.")
}
catSym, err := SymlinkData("/ipfs/adad123123/meowgie.gif")
if err != nil {
t.Fatal(err)
}
catSymPB, err := FromBytes(catSym)
isSym := catSymPB.GetType() == TSymlink
if !isSym {
t.Fatal("Failed to make a Symlink.")
}
if err != nil {
t.Fatal(err)
}
_, sizeErr := DataSize(catSym)
if sizeErr == nil {
t.Fatal("DataSize didn't throw an error when taking the size of a Symlink.")
}
}
func TestMetadata(t *testing.T) {
meta := &Metadata{
MimeType: "audio/aiff",
Size: 12345,
}
_, err := meta.Bytes()
if err != nil {
t.Fatal(err)
}
metaPB, err := BytesForMetadata(meta)
if err != nil {
t.Fatal(err)
}
meta, err = MetadataFromBytes(metaPB)
if err != nil {
t.Fatal(err)
}
mimeAiff := meta.MimeType == "audio/aiff"
if !mimeAiff {
t.Fatal("Metadata does not Marshal and Unmarshal properly!")
}
}