diff --git a/Rules.mk b/Rules.mk index 1703b5468..8178dd96a 100644 --- a/Rules.mk +++ b/Rules.mk @@ -47,9 +47,6 @@ ifneq ($(filter coverage% clean distclean,$(MAKECMDGOALS)),) include $(dir)/Rules.mk endif -dir := unixfs/pb -include $(dir)/Rules.mk - dir := pin/internal/pb include $(dir)/Rules.mk diff --git a/unixfs/archive/archive.go b/unixfs/archive/archive.go deleted file mode 100644 index 9e02aed15..000000000 --- a/unixfs/archive/archive.go +++ /dev/null @@ -1,108 +0,0 @@ -// Package archive provides utilities to archive and compress a [Unixfs] DAG. -package archive - -import ( - "bufio" - "compress/gzip" - "context" - "io" - "path" - - tar "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/archive/tar" - uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io" - - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// DefaultBufSize is the buffer size for gets. for now, 1MB, which is ~4 blocks. -// TODO: does this need to be configurable? -var DefaultBufSize = 1048576 - -type identityWriteCloser struct { - w io.Writer -} - -func (i *identityWriteCloser) Write(p []byte) (int, error) { - return i.w.Write(p) -} - -func (i *identityWriteCloser) Close() error { - return nil -} - -// DagArchive is equivalent to `ipfs getdag $hash | maybe_tar | maybe_gzip` -func DagArchive(ctx context.Context, nd ipld.Node, name string, dag ipld.DAGService, archive bool, compression int) (io.Reader, error) { - - cleaned := path.Clean(name) - _, filename := path.Split(cleaned) - - // need to connect a writer to a reader - piper, pipew := io.Pipe() - checkErrAndClosePipe := func(err error) bool { - if err != nil { - pipew.CloseWithError(err) - return true - } - return false - } - - // use a buffered writer to parallelize task - bufw := bufio.NewWriterSize(pipew, DefaultBufSize) - - // compression determines whether to use gzip compression. - maybeGzw, err := newMaybeGzWriter(bufw, compression) - if checkErrAndClosePipe(err) { - return nil, err - } - - closeGzwAndPipe := func() { - if err := maybeGzw.Close(); checkErrAndClosePipe(err) { - return - } - if err := bufw.Flush(); checkErrAndClosePipe(err) { - return - } - pipew.Close() // everything seems to be ok. - } - - if !archive && compression != gzip.NoCompression { - // the case when the node is a file - dagr, err := uio.NewDagReader(ctx, nd, dag) - if checkErrAndClosePipe(err) { - return nil, err - } - - go func() { - if _, err := dagr.WriteTo(maybeGzw); checkErrAndClosePipe(err) { - return - } - closeGzwAndPipe() // everything seems to be ok - }() - } else { - // the case for 1. archive, and 2. not archived and not compressed, in which tar is used anyway as a transport format - - // construct the tar writer - w, err := tar.NewWriter(ctx, dag, maybeGzw) - if checkErrAndClosePipe(err) { - return nil, err - } - - go func() { - // write all the nodes recursively - if err := w.WriteNode(nd, filename); checkErrAndClosePipe(err) { - return - } - w.Close() // close tar writer - closeGzwAndPipe() // everything seems to be ok - }() - } - - return piper, nil -} - -func newMaybeGzWriter(w io.Writer, compression int) (io.WriteCloser, error) { - if compression != gzip.NoCompression { - return gzip.NewWriterLevel(w, compression) - } - return &identityWriteCloser{w}, nil -} diff --git a/unixfs/archive/tar/writer.go b/unixfs/archive/tar/writer.go deleted file mode 100644 index bddec4d48..000000000 --- a/unixfs/archive/tar/writer.go +++ /dev/null @@ -1,143 +0,0 @@ -// Package tar provides functionality to write a unixfs merkledag -// as a tar archive. -package tar - -import ( - "archive/tar" - "context" - "fmt" - "io" - "path" - "time" - - mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io" - upb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb" - - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// Writer is a utility structure that helps to write -// unixfs merkledag nodes as a tar archive format. -// It wraps any io.Writer. -type Writer struct { - Dag ipld.DAGService - TarW *tar.Writer - - ctx context.Context -} - -// NewWriter wraps given io.Writer. -func NewWriter(ctx context.Context, dag ipld.DAGService, w io.Writer) (*Writer, error) { - return &Writer{ - Dag: dag, - TarW: tar.NewWriter(w), - ctx: ctx, - }, nil -} - -func (w *Writer) writeDir(nd *mdag.ProtoNode, fpath string) error { - dir, err := uio.NewDirectoryFromNode(w.Dag, nd) - if err != nil { - return err - } - if err := writeDirHeader(w.TarW, fpath); err != nil { - return err - } - - return dir.ForEachLink(w.ctx, func(l *ipld.Link) error { - child, err := w.Dag.Get(w.ctx, l.Cid) - if err != nil { - return err - } - npath := path.Join(fpath, l.Name) - return w.WriteNode(child, npath) - }) -} - -func (w *Writer) writeFile(nd *mdag.ProtoNode, fsNode *ft.FSNode, fpath string) error { - if err := writeFileHeader(w.TarW, fpath, fsNode.FileSize()); err != nil { - return err - } - - dagr := uio.NewPBFileReader(w.ctx, nd, fsNode, w.Dag) - if _, err := dagr.WriteTo(w.TarW); err != nil { - return err - } - w.TarW.Flush() - return nil -} - -// WriteNode adds a node to the archive. -func (w *Writer) WriteNode(nd ipld.Node, fpath string) error { - switch nd := nd.(type) { - case *mdag.ProtoNode: - fsNode, err := ft.FSNodeFromBytes(nd.Data()) - if err != nil { - return err - } - - switch fsNode.Type() { - case upb.Data_Metadata: - fallthrough - case upb.Data_Directory, upb.Data_HAMTShard: - return w.writeDir(nd, fpath) - case upb.Data_Raw: - fallthrough - case upb.Data_File: - return w.writeFile(nd, fsNode, fpath) - case upb.Data_Symlink: - return writeSymlinkHeader(w.TarW, string(fsNode.Data()), fpath) - default: - return ft.ErrUnrecognizedType - } - case *mdag.RawNode: - if err := writeFileHeader(w.TarW, fpath, uint64(len(nd.RawData()))); err != nil { - return err - } - - if _, err := w.TarW.Write(nd.RawData()); err != nil { - return err - } - w.TarW.Flush() - return nil - default: - return fmt.Errorf("nodes of type %T are not supported in unixfs", nd) - } -} - -// Close closes the tar writer. -func (w *Writer) Close() error { - return w.TarW.Close() -} - -func writeDirHeader(w *tar.Writer, fpath string) error { - return w.WriteHeader(&tar.Header{ - Name: fpath, - Typeflag: tar.TypeDir, - Mode: 0777, - ModTime: time.Now(), - // TODO: set mode, dates, etc. when added to unixFS - }) -} - -func writeFileHeader(w *tar.Writer, fpath string, size uint64) error { - return w.WriteHeader(&tar.Header{ - Name: fpath, - Size: int64(size), - Typeflag: tar.TypeReg, - Mode: 0644, - ModTime: time.Now(), - // TODO: set mode, dates, etc. when added to unixFS - }) -} - -func writeSymlinkHeader(w *tar.Writer, target, fpath string) error { - return w.WriteHeader(&tar.Header{ - Name: fpath, - Linkname: target, - Mode: 0777, - Typeflag: tar.TypeSymlink, - }) -} diff --git a/unixfs/hamt/hamt.go b/unixfs/hamt/hamt.go deleted file mode 100644 index e1d2750eb..000000000 --- a/unixfs/hamt/hamt.go +++ /dev/null @@ -1,530 +0,0 @@ -// Package hamt implements a Hash Array Mapped Trie over ipfs merkledag nodes. -// It is implemented mostly as described in the wikipedia article on HAMTs, -// however the table size is variable (usually 256 in our usages) as opposed to -// 32 as suggested in the article. The hash function used is currently -// Murmur3, but this value is configurable (the datastructure reports which -// hash function its using). -// -// The one algorithmic change we implement that is not mentioned in the -// wikipedia article is the collapsing of empty shards. -// Given the following tree: ( '[' = shards, '{' = values ) -// [ 'A' ] -> [ 'B' ] -> { "ABC" } -// | L-> { "ABD" } -// L-> { "ASDF" } -// If we simply removed "ABC", we would end up with a tree where shard 'B' only -// has a single child. This causes two issues, the first, is that now we have -// an extra lookup required to get to "ABD". The second issue is that now we -// have a tree that contains only "ABD", but is not the same tree that we would -// get by simply inserting "ABD" into a new tree. To address this, we always -// check for empty shard nodes upon deletion and prune them to maintain a -// consistent tree, independent of insertion order. -package hamt - -import ( - "context" - "fmt" - "os" - - dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - format "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - upb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb" - - bitfield "gx/ipfs/QmTbBs3Y3u5F69XNJzdnnc6SP5GKgcXxCDzx6w8m6piVRT/go-bitfield" - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" - "gx/ipfs/QmfJHywXQu98UeZtGJBQrPAR6AtmDjjbe3qjTo9piXHPnx/murmur3" -) - -const ( - // HashMurmur3 is the multiformats identifier for Murmur3 - HashMurmur3 uint64 = 0x22 -) - -// A Shard represents the HAMT. It should be initialized with NewShard(). -type Shard struct { - nd *dag.ProtoNode - - bitfield bitfield.Bitfield - - children []child - - tableSize int - tableSizeLg2 int - - prefix *cid.Prefix - hashFunc uint64 - - prefixPadStr string - maxpadlen int - - dserv ipld.DAGService -} - -// child can either be another shard, or a leaf node value -type child interface { - Link() (*ipld.Link, error) - Label() string -} - -// NewShard creates a new, empty HAMT shard with the given size. -func NewShard(dserv ipld.DAGService, size int) (*Shard, error) { - ds, err := makeShard(dserv, size) - if err != nil { - return nil, err - } - - ds.nd = new(dag.ProtoNode) - ds.hashFunc = HashMurmur3 - return ds, nil -} - -func makeShard(ds ipld.DAGService, size int) (*Shard, error) { - lg2s, err := logtwo(size) - if err != nil { - return nil, err - } - maxpadding := fmt.Sprintf("%X", size-1) - return &Shard{ - tableSizeLg2: lg2s, - prefixPadStr: fmt.Sprintf("%%0%dX", len(maxpadding)), - maxpadlen: len(maxpadding), - bitfield: bitfield.NewBitfield(size), - tableSize: size, - dserv: ds, - }, nil -} - -// NewHamtFromDag creates new a HAMT shard from the given DAG. -func NewHamtFromDag(dserv ipld.DAGService, nd ipld.Node) (*Shard, error) { - pbnd, ok := nd.(*dag.ProtoNode) - if !ok { - return nil, dag.ErrNotProtobuf - } - - pbd, err := format.FromBytes(pbnd.Data()) - if err != nil { - return nil, err - } - - if pbd.GetType() != upb.Data_HAMTShard { - return nil, fmt.Errorf("node was not a dir shard") - } - - if pbd.GetHashType() != HashMurmur3 { - return nil, fmt.Errorf("only murmur3 supported as hash function") - } - - ds, err := makeShard(dserv, int(pbd.GetFanout())) - if err != nil { - return nil, err - } - - ds.nd = pbnd.Copy().(*dag.ProtoNode) - ds.children = make([]child, len(pbnd.Links())) - ds.bitfield.SetBytes(pbd.GetData()) - ds.hashFunc = pbd.GetHashType() - ds.prefix = &ds.nd.Prefix - - return ds, nil -} - -// SetPrefix sets the CID Prefix -func (ds *Shard) SetPrefix(prefix *cid.Prefix) { - ds.prefix = prefix -} - -// Prefix gets the CID Prefix, may be nil if unset -func (ds *Shard) Prefix() *cid.Prefix { - return ds.prefix -} - -// Node serializes the HAMT structure into a merkledag node with unixfs formatting -func (ds *Shard) Node() (ipld.Node, error) { - out := new(dag.ProtoNode) - out.SetPrefix(ds.prefix) - - cindex := 0 - // TODO: optimized 'for each set bit' - for i := 0; i < ds.tableSize; i++ { - if !ds.bitfield.Bit(i) { - continue - } - - ch := ds.children[cindex] - if ch != nil { - clnk, err := ch.Link() - if err != nil { - return nil, err - } - - err = out.AddRawLink(ds.linkNamePrefix(i)+ch.Label(), clnk) - if err != nil { - return nil, err - } - } else { - // child unloaded, just copy in link with updated name - lnk := ds.nd.Links()[cindex] - label := lnk.Name[ds.maxpadlen:] - - err := out.AddRawLink(ds.linkNamePrefix(i)+label, lnk) - if err != nil { - return nil, err - } - } - cindex++ - } - - typ := upb.Data_HAMTShard - data, err := proto.Marshal(&upb.Data{ - Type: &typ, - Fanout: proto.Uint64(uint64(ds.tableSize)), - HashType: proto.Uint64(HashMurmur3), - Data: ds.bitfield.Bytes(), - }) - if err != nil { - return nil, err - } - - out.SetData(data) - - err = ds.dserv.Add(context.TODO(), out) - if err != nil { - return nil, err - } - - return out, nil -} - -type shardValue struct { - key string - val *ipld.Link -} - -// Link returns a link to this node -func (sv *shardValue) Link() (*ipld.Link, error) { - return sv.val, nil -} - -func (sv *shardValue) Label() string { - return sv.key -} - -func hash(val []byte) []byte { - h := murmur3.New64() - h.Write(val) - return h.Sum(nil) -} - -// Label for Shards is the empty string, this is used to differentiate them from -// value entries -func (ds *Shard) Label() string { - return "" -} - -// Set sets 'name' = nd in the HAMT -func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error { - hv := &hashBits{b: hash([]byte(name))} - err := ds.dserv.Add(ctx, nd) - if err != nil { - return err - } - - lnk, err := ipld.MakeLink(nd) - if err != nil { - return err - } - lnk.Name = ds.linkNamePrefix(0) + name - - return ds.modifyValue(ctx, hv, name, lnk) -} - -// Remove deletes the named entry if it exists, this operation is idempotent. -func (ds *Shard) Remove(ctx context.Context, name string) error { - hv := &hashBits{b: hash([]byte(name))} - return ds.modifyValue(ctx, hv, name, nil) -} - -// Find searches for a child node by 'name' within this hamt -func (ds *Shard) Find(ctx context.Context, name string) (*ipld.Link, error) { - hv := &hashBits{b: hash([]byte(name))} - - var out *ipld.Link - err := ds.getValue(ctx, hv, name, func(sv *shardValue) error { - out = sv.val - return nil - }) - if err != nil { - return nil, err - } - - return out, nil -} - -// getChild returns the i'th child of this shard. If it is cached in the -// children array, it will return it from there. Otherwise, it loads the child -// node from disk. -func (ds *Shard) getChild(ctx context.Context, i int) (child, error) { - if i >= len(ds.children) || i < 0 { - return nil, fmt.Errorf("invalid index passed to getChild (likely corrupt bitfield)") - } - - if len(ds.children) != len(ds.nd.Links()) { - return nil, fmt.Errorf("inconsistent lengths between children array and Links array") - } - - c := ds.children[i] - if c != nil { - return c, nil - } - - return ds.loadChild(ctx, i) -} - -// loadChild reads the i'th child node of this shard from disk and returns it -// as a 'child' interface -func (ds *Shard) loadChild(ctx context.Context, i int) (child, error) { - lnk := ds.nd.Links()[i] - if len(lnk.Name) < ds.maxpadlen { - return nil, fmt.Errorf("invalid link name '%s'", lnk.Name) - } - - var c child - if len(lnk.Name) == ds.maxpadlen { - nd, err := lnk.GetNode(ctx, ds.dserv) - if err != nil { - return nil, err - } - cds, err := NewHamtFromDag(ds.dserv, nd) - if err != nil { - return nil, err - } - - c = cds - } else { - lnk2 := *lnk - c = &shardValue{ - key: lnk.Name[ds.maxpadlen:], - val: &lnk2, - } - } - - ds.children[i] = c - return c, nil -} - -func (ds *Shard) setChild(i int, c child) { - ds.children[i] = c -} - -// Link returns a merklelink to this shard node -func (ds *Shard) Link() (*ipld.Link, error) { - nd, err := ds.Node() - if err != nil { - return nil, err - } - - err = ds.dserv.Add(context.TODO(), nd) - if err != nil { - return nil, err - } - - return ipld.MakeLink(nd) -} - -func (ds *Shard) insertChild(idx int, key string, lnk *ipld.Link) error { - if lnk == nil { - return os.ErrNotExist - } - - i := ds.indexForBitPos(idx) - ds.bitfield.SetBit(idx) - - lnk.Name = ds.linkNamePrefix(idx) + key - sv := &shardValue{ - key: key, - val: lnk, - } - - ds.children = append(ds.children[:i], append([]child{sv}, ds.children[i:]...)...) - ds.nd.SetLinks(append(ds.nd.Links()[:i], append([]*ipld.Link{nil}, ds.nd.Links()[i:]...)...)) - return nil -} - -func (ds *Shard) rmChild(i int) error { - if i < 0 || i >= len(ds.children) || i >= len(ds.nd.Links()) { - return fmt.Errorf("hamt: attempted to remove child with out of range index") - } - - copy(ds.children[i:], ds.children[i+1:]) - ds.children = ds.children[:len(ds.children)-1] - - copy(ds.nd.Links()[i:], ds.nd.Links()[i+1:]) - ds.nd.SetLinks(ds.nd.Links()[:len(ds.nd.Links())-1]) - - return nil -} - -func (ds *Shard) getValue(ctx context.Context, hv *hashBits, key string, cb func(*shardValue) error) error { - idx := hv.Next(ds.tableSizeLg2) - if ds.bitfield.Bit(int(idx)) { - cindex := ds.indexForBitPos(idx) - - child, err := ds.getChild(ctx, cindex) - if err != nil { - return err - } - - switch child := child.(type) { - case *Shard: - return child.getValue(ctx, hv, key, cb) - case *shardValue: - if child.key == key { - return cb(child) - } - } - } - - return os.ErrNotExist -} - -// EnumLinks collects all links in the Shard. -func (ds *Shard) EnumLinks(ctx context.Context) ([]*ipld.Link, error) { - var links []*ipld.Link - err := ds.ForEachLink(ctx, func(l *ipld.Link) error { - links = append(links, l) - return nil - }) - return links, err -} - -// ForEachLink walks the Shard and calls the given function. -func (ds *Shard) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error { - return ds.walkTrie(ctx, func(sv *shardValue) error { - lnk := sv.val - lnk.Name = sv.key - - return f(lnk) - }) -} - -func (ds *Shard) walkTrie(ctx context.Context, cb func(*shardValue) error) error { - for idx := range ds.children { - c, err := ds.getChild(ctx, idx) - if err != nil { - return err - } - - switch c := c.(type) { - case *shardValue: - if err := cb(c); err != nil { - return err - } - - case *Shard: - if err := c.walkTrie(ctx, cb); err != nil { - return err - } - default: - return fmt.Errorf("unexpected child type: %#v", c) - } - } - return nil -} - -func (ds *Shard) modifyValue(ctx context.Context, hv *hashBits, key string, val *ipld.Link) error { - idx := hv.Next(ds.tableSizeLg2) - - if !ds.bitfield.Bit(idx) { - return ds.insertChild(idx, key, val) - } - - cindex := ds.indexForBitPos(idx) - - child, err := ds.getChild(ctx, cindex) - if err != nil { - return err - } - - switch child := child.(type) { - case *Shard: - err := child.modifyValue(ctx, hv, key, val) - if err != nil { - return err - } - - if val == nil { - switch len(child.children) { - case 0: - // empty sub-shard, prune it - // Note: this shouldnt normally ever happen - // in the event of another implementation creates flawed - // structures, this will help to normalize them. - ds.bitfield.UnsetBit(idx) - return ds.rmChild(cindex) - case 1: - nchild, ok := child.children[0].(*shardValue) - if ok { - // sub-shard with a single value element, collapse it - ds.setChild(cindex, nchild) - } - return nil - } - } - - return nil - case *shardValue: - if child.key == key { - // value modification - if val == nil { - ds.bitfield.UnsetBit(idx) - return ds.rmChild(cindex) - } - - child.val = val - return nil - } - - if val == nil { - return os.ErrNotExist - } - - // replace value with another shard, one level deeper - ns, err := NewShard(ds.dserv, ds.tableSize) - if err != nil { - return err - } - ns.prefix = ds.prefix - chhv := &hashBits{ - b: hash([]byte(child.key)), - consumed: hv.consumed, - } - - err = ns.modifyValue(ctx, hv, key, val) - if err != nil { - return err - } - - err = ns.modifyValue(ctx, chhv, child.key, child.val) - if err != nil { - return err - } - - ds.setChild(cindex, ns) - return nil - default: - return fmt.Errorf("unexpected type for child: %#v", child) - } -} - -// indexForBitPos returns the index within the collapsed array corresponding to -// the given bit in the bitset. The collapsed array contains only one entry -// per bit set in the bitfield, and this function is used to map the indices. -func (ds *Shard) indexForBitPos(bp int) int { - return ds.bitfield.OnesBefore(bp) -} - -// linkNamePrefix takes in the bitfield index of an entry and returns its hex prefix -func (ds *Shard) linkNamePrefix(idx int) string { - return fmt.Sprintf(ds.prefixPadStr, idx) -} diff --git a/unixfs/hamt/hamt_stress_test.go b/unixfs/hamt/hamt_stress_test.go deleted file mode 100644 index 6dff0c811..000000000 --- a/unixfs/hamt/hamt_stress_test.go +++ /dev/null @@ -1,291 +0,0 @@ -package hamt - -import ( - "context" - "fmt" - "math/rand" - "os" - "testing" - "time" - - mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -func getNames(prefix string, count int) []string { - out := make([]string, count) - for i := 0; i < count; i++ { - out[i] = fmt.Sprintf("%s%d", prefix, i) - } - return out -} - -const ( - opAdd = iota - opDel - opFind -) - -type testOp struct { - Op int - Val string -} - -func stringArrToSet(arr []string) map[string]bool { - out := make(map[string]bool) - for _, s := range arr { - out[s] = true - } - return out -} - -// generate two different random sets of operations to result in the same -// ending directory (same set of entries at the end) and execute each of them -// in turn, then compare to ensure the output is the same on each. -func TestOrderConsistency(t *testing.T) { - seed := time.Now().UnixNano() - t.Logf("using seed = %d", seed) - ds := mdtest.Mock() - - shardWidth := 1024 - - keep := getNames("good", 4000) - temp := getNames("tempo", 6000) - - ops := genOpSet(seed, keep, temp) - s, err := executeOpSet(t, ds, shardWidth, ops) - if err != nil { - t.Fatal(err) - } - - err = validateOpSetCompletion(t, s, keep, temp) - if err != nil { - t.Fatal(err) - } - - ops2 := genOpSet(seed+1000, keep, temp) - s2, err := executeOpSet(t, ds, shardWidth, ops2) - if err != nil { - t.Fatal(err) - } - - err = validateOpSetCompletion(t, s2, keep, temp) - if err != nil { - t.Fatal(err) - } - - nd, err := s.Node() - if err != nil { - t.Fatal(err) - } - - nd2, err := s2.Node() - if err != nil { - t.Fatal(err) - } - - k := nd.Cid() - k2 := nd2.Cid() - - if !k.Equals(k2) { - t.Fatal("got different results: ", k, k2) - } -} - -func validateOpSetCompletion(t *testing.T, s *Shard, keep, temp []string) error { - ctx := context.TODO() - for _, n := range keep { - _, err := s.Find(ctx, n) - if err != nil { - return fmt.Errorf("couldnt find %s: %s", n, err) - } - } - - for _, n := range temp { - _, err := s.Find(ctx, n) - if err != os.ErrNotExist { - return fmt.Errorf("expected not to find: %s", err) - } - } - - return nil -} - -func executeOpSet(t *testing.T, ds ipld.DAGService, width int, ops []testOp) (*Shard, error) { - ctx := context.TODO() - s, err := NewShard(ds, width) - if err != nil { - return nil, err - } - - e := ft.EmptyDirNode() - ds.Add(ctx, e) - - for _, o := range ops { - switch o.Op { - case opAdd: - err := s.Set(ctx, o.Val, e) - if err != nil { - return nil, fmt.Errorf("inserting %s: %s", o.Val, err) - } - case opDel: - err := s.Remove(ctx, o.Val) - if err != nil { - return nil, fmt.Errorf("deleting %s: %s", o.Val, err) - } - case opFind: - _, err := s.Find(ctx, o.Val) - if err != nil { - return nil, fmt.Errorf("finding %s: %s", o.Val, err) - } - } - } - - return s, nil -} - -func genOpSet(seed int64, keep, temp []string) []testOp { - tempset := stringArrToSet(temp) - - allnames := append(keep, temp...) - shuffle(seed, allnames) - - var todel []string - - var ops []testOp - - for { - n := len(allnames) + len(todel) - if n == 0 { - return ops - } - - rn := rand.Intn(n) - - if rn < len(allnames) { - next := allnames[0] - allnames = allnames[1:] - ops = append(ops, testOp{ - Op: opAdd, - Val: next, - }) - - if tempset[next] { - todel = append(todel, next) - } - } else { - shuffle(seed+100, todel) - next := todel[0] - todel = todel[1:] - - ops = append(ops, testOp{ - Op: opDel, - Val: next, - }) - } - } -} - -// executes the given op set with a repl to allow easier debugging -/*func debugExecuteOpSet(ds node.DAGService, width int, ops []testOp) (*Shard, error) { - - s, err := NewShard(ds, width) - if err != nil { - return nil, err - } - - e := ft.EmptyDirNode() - ds.Add(e) - ctx := context.TODO() - - run := 0 - - opnames := map[int]string{ - opAdd: "add", - opDel: "del", - } - -mainloop: - for i := 0; i < len(ops); i++ { - o := ops[i] - - fmt.Printf("Op %d: %s %s\n", i, opnames[o.Op], o.Val) - for run == 0 { - cmd := readCommand() - parts := strings.Split(cmd, " ") - switch parts[0] { - case "": - run = 1 - case "find": - _, err := s.Find(ctx, parts[1]) - if err == nil { - fmt.Println("success") - } else { - fmt.Println(err) - } - case "run": - if len(parts) > 1 { - n, err := strconv.Atoi(parts[1]) - if err != nil { - panic(err) - } - - run = n - } else { - run = -1 - } - case "lookop": - for k = 0; k < len(ops); k++ { - if ops[k].Val == parts[1] { - fmt.Printf(" Op %d: %s %s\n", k, opnames[ops[k].Op], parts[1]) - } - } - case "restart": - var err error - s, err = NewShard(ds, width) - if err != nil { - panic(err) - } - i = -1 - continue mainloop - case "print": - nd, err := s.Node() - if err != nil { - panic(err) - } - printDag(ds, nd.(*dag.ProtoNode), 0) - } - } - run-- - - switch o.Op { - case opAdd: - err := s.Set(ctx, o.Val, e) - if err != nil { - return nil, fmt.Errorf("inserting %s: %s", o.Val, err) - } - case opDel: - fmt.Println("deleting: ", o.Val) - err := s.Remove(ctx, o.Val) - if err != nil { - return nil, fmt.Errorf("deleting %s: %s", o.Val, err) - } - case opFind: - _, err := s.Find(ctx, o.Val) - if err != nil { - return nil, fmt.Errorf("finding %s: %s", o.Val, err) - } - } - } - - return s, nil -} - -func readCommand() string { - fmt.Print("> ") - scan := bufio.NewScanner(os.Stdin) - scan.Scan() - return scan.Text() -}*/ diff --git a/unixfs/hamt/hamt_test.go b/unixfs/hamt/hamt_test.go deleted file mode 100644 index 2c00cfec4..000000000 --- a/unixfs/hamt/hamt_test.go +++ /dev/null @@ -1,610 +0,0 @@ -package hamt - -import ( - "context" - "fmt" - "math/rand" - "os" - "sort" - "testing" - "time" - - "github.com/ipfs/go-ipfs/dagutils" - dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -func shuffle(seed int64, arr []string) { - r := rand.New(rand.NewSource(seed)) - for i := 0; i < len(arr); i++ { - a := r.Intn(len(arr)) - b := r.Intn(len(arr)) - arr[a], arr[b] = arr[b], arr[a] - } -} - -func makeDir(ds ipld.DAGService, size int) ([]string, *Shard, error) { - return makeDirWidth(ds, size, 256) -} - -func makeDirWidth(ds ipld.DAGService, size, width int) ([]string, *Shard, error) { - ctx := context.Background() - - s, _ := NewShard(ds, width) - - var dirs []string - for i := 0; i < size; i++ { - dirs = append(dirs, fmt.Sprintf("DIRNAME%d", i)) - } - - shuffle(time.Now().UnixNano(), dirs) - - for i := 0; i < len(dirs); i++ { - nd := ft.EmptyDirNode() - ds.Add(ctx, nd) - err := s.Set(ctx, dirs[i], nd) - if err != nil { - return nil, nil, err - } - } - - return dirs, s, nil -} - -func assertLink(s *Shard, name string, found bool) error { - _, err := s.Find(context.Background(), name) - switch err { - case os.ErrNotExist: - if found { - return err - } - - return nil - case nil: - if found { - return nil - } - - return fmt.Errorf("expected not to find link named %s", name) - default: - return err - } -} - -func assertSerializationWorks(ds ipld.DAGService, s *Shard) error { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - nd, err := s.Node() - if err != nil { - return err - } - - nds, err := NewHamtFromDag(ds, nd) - if err != nil { - return err - } - - linksA, err := s.EnumLinks(ctx) - if err != nil { - return err - } - - linksB, err := nds.EnumLinks(ctx) - if err != nil { - return err - } - - if len(linksA) != len(linksB) { - return fmt.Errorf("links arrays are different sizes") - } - - for i, a := range linksA { - b := linksB[i] - if a.Name != b.Name { - return fmt.Errorf("links names mismatch") - } - - if a.Cid.String() != b.Cid.String() { - return fmt.Errorf("link hashes dont match") - } - - if a.Size != b.Size { - return fmt.Errorf("link sizes not the same") - } - } - - return nil -} - -func TestBasicSet(t *testing.T) { - ds := mdtest.Mock() - for _, w := range []int{128, 256, 512, 1024, 2048, 4096} { - t.Run(fmt.Sprintf("BasicSet%d", w), func(t *testing.T) { - names, s, err := makeDirWidth(ds, 1000, w) - if err != nil { - t.Fatal(err) - } - ctx := context.Background() - - for _, d := range names { - _, err := s.Find(ctx, d) - if err != nil { - t.Fatal(err) - } - } - }) - } -} - -func TestDirBuilding(t *testing.T) { - ds := mdtest.Mock() - _, _ = NewShard(ds, 256) - - _, s, err := makeDir(ds, 200) - if err != nil { - t.Fatal(err) - } - - nd, err := s.Node() - if err != nil { - t.Fatal(err) - } - - //printDag(ds, nd, 0) - - k := nd.Cid() - - if k.String() != "QmY89TkSEVHykWMHDmyejSWFj9CYNtvzw4UwnT9xbc4Zjc" { - t.Fatalf("output didnt match what we expected (got %s)", k.String()) - } -} - -func TestShardReload(t *testing.T) { - ds := mdtest.Mock() - _, _ = NewShard(ds, 256) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - _, s, err := makeDir(ds, 200) - if err != nil { - t.Fatal(err) - } - - nd, err := s.Node() - if err != nil { - t.Fatal(err) - } - - nds, err := NewHamtFromDag(ds, nd) - if err != nil { - t.Fatal(err) - } - - lnks, err := nds.EnumLinks(ctx) - if err != nil { - t.Fatal(err) - } - - if len(lnks) != 200 { - t.Fatal("not enough links back") - } - - _, err = nds.Find(ctx, "DIRNAME50") - if err != nil { - t.Fatal(err) - } - - // Now test roundtrip marshal with no operations - - nds, err = NewHamtFromDag(ds, nd) - if err != nil { - t.Fatal(err) - } - - ond, err := nds.Node() - if err != nil { - t.Fatal(err) - } - - outk := ond.Cid() - ndk := nd.Cid() - - if !outk.Equals(ndk) { - printDiff(ds, nd.(*dag.ProtoNode), ond.(*dag.ProtoNode)) - t.Fatal("roundtrip serialization failed") - } -} - -func TestRemoveElems(t *testing.T) { - ds := mdtest.Mock() - dirs, s, err := makeDir(ds, 500) - if err != nil { - t.Fatal(err) - } - ctx := context.Background() - - for i := 0; i < 100; i++ { - err := s.Remove(ctx, fmt.Sprintf("NOTEXIST%d", rand.Int())) - if err != os.ErrNotExist { - t.Fatal("shouldnt be able to remove things that don't exist") - } - } - - for _, d := range dirs { - _, err := s.Find(ctx, d) - if err != nil { - t.Fatal(err) - } - } - - shuffle(time.Now().UnixNano(), dirs) - - for _, d := range dirs { - err := s.Remove(ctx, d) - if err != nil { - t.Fatal(err) - } - } - - nd, err := s.Node() - if err != nil { - t.Fatal(err) - } - - if len(nd.Links()) > 0 { - t.Fatal("shouldnt have any links here") - } - - err = s.Remove(ctx, "doesnt exist") - if err != os.ErrNotExist { - t.Fatal("expected error does not exist") - } -} - -func TestSetAfterMarshal(t *testing.T) { - ds := mdtest.Mock() - _, s, err := makeDir(ds, 300) - if err != nil { - t.Fatal(err) - } - ctx := context.Background() - - nd, err := s.Node() - if err != nil { - t.Fatal(err) - } - - nds, err := NewHamtFromDag(ds, nd) - if err != nil { - t.Fatal(err) - } - - empty := ft.EmptyDirNode() - for i := 0; i < 100; i++ { - err := nds.Set(ctx, fmt.Sprintf("moredirs%d", i), empty) - if err != nil { - t.Fatal(err) - } - } - - links, err := nds.EnumLinks(ctx) - if err != nil { - t.Fatal(err) - } - - if len(links) != 400 { - t.Fatal("expected 400 links") - } - - err = assertSerializationWorks(ds, nds) - if err != nil { - t.Fatal(err) - } -} - -func TestDuplicateAddShard(t *testing.T) { - ds := mdtest.Mock() - dir, _ := NewShard(ds, 256) - nd := new(dag.ProtoNode) - ctx := context.Background() - - err := dir.Set(ctx, "test", nd) - if err != nil { - t.Fatal(err) - } - - err = dir.Set(ctx, "test", nd) - if err != nil { - t.Fatal(err) - } - - lnks, err := dir.EnumLinks(ctx) - if err != nil { - t.Fatal(err) - } - - if len(lnks) != 1 { - t.Fatal("expected only one link") - } -} - -func TestLoadFailsFromNonShard(t *testing.T) { - ds := mdtest.Mock() - nd := ft.EmptyDirNode() - - _, err := NewHamtFromDag(ds, nd) - if err == nil { - t.Fatal("expected dir shard creation to fail when given normal directory") - } - - nd = new(dag.ProtoNode) - - _, err = NewHamtFromDag(ds, nd) - if err == nil { - t.Fatal("expected dir shard creation to fail when given normal directory") - } -} - -func TestFindNonExisting(t *testing.T) { - ds := mdtest.Mock() - _, s, err := makeDir(ds, 100) - if err != nil { - t.Fatal(err) - } - ctx := context.Background() - - for i := 0; i < 200; i++ { - _, err := s.Find(ctx, fmt.Sprintf("notfound%d", i)) - if err != os.ErrNotExist { - t.Fatal("expected ErrNotExist") - } - } -} - -func TestRemoveElemsAfterMarshal(t *testing.T) { - ds := mdtest.Mock() - dirs, s, err := makeDir(ds, 30) - if err != nil { - t.Fatal(err) - } - ctx := context.Background() - - sort.Strings(dirs) - - err = s.Remove(ctx, dirs[0]) - if err != nil { - t.Fatal(err) - } - - out, err := s.Find(ctx, dirs[0]) - if err == nil { - t.Fatal("expected error, got: ", out) - } - - nd, err := s.Node() - if err != nil { - t.Fatal(err) - } - - nds, err := NewHamtFromDag(ds, nd) - if err != nil { - t.Fatal(err) - } - - _, err = nds.Find(ctx, dirs[0]) - if err == nil { - t.Fatal("expected not to find ", dirs[0]) - } - - for _, d := range dirs[1:] { - _, err := nds.Find(ctx, d) - if err != nil { - t.Fatal("could not find expected link after unmarshaling") - } - } - - for _, d := range dirs[1:] { - err := nds.Remove(ctx, d) - if err != nil { - t.Fatal(err) - } - } - - links, err := nds.EnumLinks(ctx) - if err != nil { - t.Fatal(err) - } - - if len(links) != 0 { - t.Fatal("expected all links to be removed") - } - - err = assertSerializationWorks(ds, nds) - if err != nil { - t.Fatal(err) - } -} - -func TestBitfieldIndexing(t *testing.T) { - ds := mdtest.Mock() - s, _ := NewShard(ds, 256) - - set := func(i int) { - s.bitfield.SetBit(i) - } - - assert := func(i int, val int) { - if s.indexForBitPos(i) != val { - t.Fatalf("expected index %d to be %d", i, val) - } - } - - assert(50, 0) - set(4) - set(5) - set(60) - - assert(10, 2) - set(3) - assert(10, 3) - assert(1, 0) - - assert(100, 4) - set(50) - assert(45, 3) - set(100) - assert(100, 5) -} - -// test adding a sharded directory node as the child of another directory node. -// if improperly implemented, the parent hamt may assume the child is a part of -// itself. -func TestSetHamtChild(t *testing.T) { - ctx := context.Background() - - ds := mdtest.Mock() - s, _ := NewShard(ds, 256) - - e := ft.EmptyDirNode() - ds.Add(ctx, e) - - err := s.Set(ctx, "bar", e) - if err != nil { - t.Fatal(err) - } - - snd, err := s.Node() - if err != nil { - t.Fatal(err) - } - - _, ns, err := makeDir(ds, 50) - if err != nil { - t.Fatal(err) - } - - err = ns.Set(ctx, "foo", snd) - if err != nil { - t.Fatal(err) - } - - nsnd, err := ns.Node() - if err != nil { - t.Fatal(err) - } - - hs, err := NewHamtFromDag(ds, nsnd) - if err != nil { - t.Fatal(err) - } - - err = assertLink(hs, "bar", false) - if err != nil { - t.Fatal(err) - } - - err = assertLink(hs, "foo", true) - if err != nil { - t.Fatal(err) - } -} - -func printDiff(ds ipld.DAGService, a, b *dag.ProtoNode) { - diff, err := dagutils.Diff(context.TODO(), ds, a, b) - if err != nil { - panic(err) - } - - for _, d := range diff { - fmt.Println(d) - } -} - -func BenchmarkHAMTWalk(b *testing.B) { - ctx := context.Background() - - ds := mdtest.Mock() - sh, _ := NewShard(ds, 256) - nd, err := sh.Node() - if err != nil { - b.Fatal(err) - } - - err = ds.Add(ctx, nd) - if err != nil { - b.Fatal(err) - } - ds.Add(ctx, ft.EmptyDirNode()) - - s, err := NewHamtFromDag(ds, nd) - if err != nil { - b.Fatal(err) - } - - for j := 0; j < 1000; j++ { - err = s.Set(ctx, fmt.Sprintf("%d", j), ft.EmptyDirNode()) - if err != nil { - b.Fatal(err) - } - } - - for i := 0; i < b.N; i++ { - cnt := 0 - err = s.ForEachLink(ctx, func(l *ipld.Link) error { - cnt++ - return nil - }) - if err != nil { - b.Fatal(err) - } - if cnt < 1000 { - b.Fatal("expected 100 children") - } - } -} - -func BenchmarkHAMTSet(b *testing.B) { - ctx := context.Background() - - ds := mdtest.Mock() - sh, _ := NewShard(ds, 256) - nd, err := sh.Node() - if err != nil { - b.Fatal(err) - } - - err = ds.Add(ctx, nd) - if err != nil { - b.Fatal(err) - } - ds.Add(ctx, ft.EmptyDirNode()) - - for i := 0; i < b.N; i++ { - s, err := NewHamtFromDag(ds, nd) - if err != nil { - b.Fatal(err) - } - - err = s.Set(context.TODO(), fmt.Sprint(i), ft.EmptyDirNode()) - if err != nil { - b.Fatal(err) - } - - out, err := s.Node() - if err != nil { - b.Fatal(err) - } - - nd = out - } -} - -func TestHamtBadSize(t *testing.T) { - _, err := NewShard(nil, 7) - if err == nil { - t.Fatal("should have failed to construct hamt with bad size") - } -} diff --git a/unixfs/hamt/util.go b/unixfs/hamt/util.go deleted file mode 100644 index 5f684a21a..000000000 --- a/unixfs/hamt/util.go +++ /dev/null @@ -1,52 +0,0 @@ -package hamt - -import ( - "fmt" - "math/bits" -) - -// hashBits is a helper that allows the reading of the 'next n bits' as an integer. -type hashBits struct { - b []byte - consumed int -} - -func mkmask(n int) byte { - return (1 << uint(n)) - 1 -} - -// Next returns the next 'i' bits of the hashBits value as an integer -func (hb *hashBits) Next(i int) int { - curbi := hb.consumed / 8 - leftb := 8 - (hb.consumed % 8) - - curb := hb.b[curbi] - if i == leftb { - out := int(mkmask(i) & curb) - hb.consumed += i - return out - } else if i < leftb { - a := curb & mkmask(leftb) // mask out the high bits we don't want - b := a & ^mkmask(leftb-i) // mask out the low bits we don't want - c := b >> uint(leftb-i) // shift whats left down - hb.consumed += i - return int(c) - } else { - out := int(mkmask(leftb) & curb) - out <<= uint(i - leftb) - hb.consumed += leftb - out += hb.Next(i - leftb) - return out - } -} - -func logtwo(v int) (int, error) { - if v <= 0 { - return 0, fmt.Errorf("hamt size should be a power of two") - } - lg2 := bits.TrailingZeros(uint(v)) - if 1<= 0; coff -= 4096 { - t.Log(coff) - n, err := rs.Seek(coff, io.SeekStart) - if err != nil { - t.Fatal(err) - } - if n != coff { - t.Fatal("wasnt able to seek to the right position") - } - nread, err := rs.Read(out[coff : coff+4096]) - if err != nil { - t.Fatal(err) - } - if nread != 4096 { - t.Fatal("didnt read the correct number of bytes") - } - } - - err = arrComp(out, should) - if err != nil { - t.Fatal(err) - } -} diff --git a/unixfs/importer/balanced/builder.go b/unixfs/importer/balanced/builder.go deleted file mode 100644 index fcd87fb8c..000000000 --- a/unixfs/importer/balanced/builder.go +++ /dev/null @@ -1,255 +0,0 @@ -// Package balanced provides methods to build balanced DAGs, which are generalistic -// DAGs in which all leaves (nodes representing chunks of data) are at the same -// distance from the root. Nodes can have only a maximum number of children; to be -// able to store more leaf data nodes balanced DAGs are extended by increasing its -// depth (and having more intermediary nodes). -// -// Internal nodes are always represented by UnixFS nodes (of type `File`) encoded -// inside DAG nodes (see the `go-unixfs` package for details of UnixFS). In -// contrast, leaf nodes with data have multiple possible representations: UnixFS -// nodes as above, raw nodes with just the file data (no format) and Filestore -// nodes (that directly link to the file on disk using a format stored on a raw -// node, see the `go-ipfs/filestore` package for details of Filestore.) -// -// In the case the entire file fits into just one node it will be formatted as a -// (single) leaf node (without parent) with the possible representations already -// mentioned. This is the only scenario where the root can be of a type different -// that the UnixFS node. -// -// +-------------+ -// | Root 4 | -// +-------------+ -// | -// +--------------------------+----------------------------+ -// | | -// +-------------+ +-------------+ -// | Node 2 | | Node 5 | -// +-------------+ +-------------+ -// | | -// +-------------+-------------+ +-------------+ -// | | | -// +-------------+ +-------------+ +-------------+ -// | Node 1 | | Node 3 | | Node 6 | -// +-------------+ +-------------+ +-------------+ -// | | | -// +------+------+ +------+------+ +------+ -// | | | | | -// +=========+ +=========+ +=========+ +=========+ +=========+ -// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 | | Chunk 5 | -// +=========+ +=========+ +=========+ +=========+ +=========+ -// -package balanced - -import ( - "errors" - - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers" - - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// Layout builds a balanced DAG layout. In a balanced DAG of depth 1, leaf nodes -// with data are added to a single `root` until the maximum number of links is -// reached. Then, to continue adding more data leaf nodes, a `newRoot` is created -// pointing to the old `root` (which will now become and intermediary node), -// increasing the depth of the DAG to 2. This will increase the maximum number of -// data leaf nodes the DAG can have (`Maxlinks() ^ depth`). The `fillNodeRec` -// function will add more intermediary child nodes to `newRoot` (which already has -// `root` as child) that in turn will have leaf nodes with data added to them. -// After that process is completed (the maximum number of links is reached), -// `fillNodeRec` will return and the loop will be repeated: the `newRoot` created -// will become the old `root` and a new root will be created again to increase the -// depth of the DAG. The process is repeated until there is no more data to add -// (i.e. the DagBuilderHelper’s Done() function returns true). -// -// The nodes are filled recursively, so the DAG is built from the bottom up. Leaf -// nodes are created first using the chunked file data and its size. The size is -// then bubbled up to the parent (internal) node, which aggregates all the sizes of -// its children and bubbles that combined size up to its parent, and so on up to -// the root. This way, a balanced DAG acts like a B-tree when seeking to a byte -// offset in the file the graph represents: each internal node uses the file size -// of its children as an index when seeking. -// -// `Layout` creates a root and hands it off to be filled: -// -// +-------------+ -// | Root 1 | -// +-------------+ -// | -// ( fillNodeRec fills in the ) -// ( chunks on the root. ) -// | -// +------+------+ -// | | -// + - - - - + + - - - - + -// | Chunk 1 | | Chunk 2 | -// + - - - - + + - - - - + -// -// ↓ -// When the root is full but there's more data... -// ↓ -// -// +-------------+ -// | Root 1 | -// +-------------+ -// | -// +------+------+ -// | | -// +=========+ +=========+ + - - - - + -// | Chunk 1 | | Chunk 2 | | Chunk 3 | -// +=========+ +=========+ + - - - - + -// -// ↓ -// ...Layout's job is to create a new root. -// ↓ -// -// +-------------+ -// | Root 2 | -// +-------------+ -// | -// +-------------+ - - - - - - - - + -// | | -// +-------------+ ( fillNodeRec creates the ) -// | Node 1 | ( branch that connects ) -// +-------------+ ( "Root 2" to "Chunk 3." ) -// | | -// +------+------+ + - - - - -+ -// | | | -// +=========+ +=========+ + - - - - + -// | Chunk 1 | | Chunk 2 | | Chunk 3 | -// +=========+ +=========+ + - - - - + -// -func Layout(db *h.DagBuilderHelper) (ipld.Node, error) { - if db.Done() { - // No data, return just an empty node. - root, err := db.NewLeafNode(nil) - if err != nil { - return nil, err - } - // This works without Filestore support (`ProcessFileStore`). - // TODO: Why? Is there a test case missing? - - return db.AddNodeAndClose(root) - } - - // The first `root` will be a single leaf node with data - // (corner case), after that subsequent `root` nodes will - // always be internal nodes (with a depth > 0) that can - // be handled by the loop. - root, fileSize, err := db.NewLeafDataNode() - if err != nil { - return nil, err - } - - // Each time a DAG of a certain `depth` is filled (because it - // has reached its maximum capacity of `db.Maxlinks()` per node) - // extend it by making it a sub-DAG of a bigger DAG with `depth+1`. - for depth := 1; !db.Done(); depth++ { - - // Add the old `root` as a child of the `newRoot`. - newRoot := db.NewFSNodeOverDag(ft.TFile) - newRoot.AddChild(root, fileSize, db) - - // Fill the `newRoot` (that has the old `root` already as child) - // and make it the current `root` for the next iteration (when - // it will become "old"). - root, fileSize, err = fillNodeRec(db, newRoot, depth) - if err != nil { - return nil, err - } - } - - return db.AddNodeAndClose(root) -} - -// fillNodeRec will "fill" the given internal (non-leaf) `node` with data by -// adding child nodes to it, either leaf data nodes (if `depth` is 1) or more -// internal nodes with higher depth (and calling itself recursively on them -// until *they* are filled with data). The data to fill the node with is -// provided by DagBuilderHelper. -// -// `node` represents a (sub-)DAG root that is being filled. If called recursively, -// it is `nil`, a new node is created. If it has been called from `Layout` (see -// diagram below) it points to the new root (that increases the depth of the DAG), -// it already has a child (the old root). New children will be added to this new -// root, and those children will in turn be filled (calling `fillNodeRec` -// recursively). -// -// +-------------+ -// | `node` | -// | (new root) | -// +-------------+ -// | -// +-------------+ - - - - - - + - - - - - - - - - - - + -// | | | -// +--------------+ + - - - - - + + - - - - - + -// | (old root) | | new child | | | -// +--------------+ + - - - - - + + - - - - - + -// | | | -// +------+------+ + - - + - - - + -// | | | | -// +=========+ +=========+ + - - - - + + - - - - + -// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 | -// +=========+ +=========+ + - - - - + + - - - - + -// -// The `node` to be filled uses the `FSNodeOverDag` abstraction that allows adding -// child nodes without packing/unpacking the UnixFS layer node (having an internal -// `ft.FSNode` cache). -// -// It returns the `ipld.Node` representation of the passed `node` filled with -// children and the `nodeFileSize` with the total size of the file chunk (leaf) -// nodes stored under this node (parent nodes store this to enable efficient -// seeking through the DAG when reading data later). -// -// warning: **children** pinned indirectly, but input node IS NOT pinned. -func fillNodeRec(db *h.DagBuilderHelper, node *h.FSNodeOverDag, depth int) (filledNode ipld.Node, nodeFileSize uint64, err error) { - if depth < 1 { - return nil, 0, errors.New("attempt to fillNode at depth < 1") - } - - if node == nil { - node = db.NewFSNodeOverDag(ft.TFile) - } - - // Child node created on every iteration to add to parent `node`. - // It can be a leaf node or another internal node. - var childNode ipld.Node - // File size from the child node needed to update the `FSNode` - // in `node` when adding the child. - var childFileSize uint64 - - // While we have room and there is data available to be added. - for node.NumChildren() < db.Maxlinks() && !db.Done() { - - if depth == 1 { - // Base case: add leaf node with data. - childNode, childFileSize, err = db.NewLeafDataNode() - if err != nil { - return nil, 0, err - } - } else { - // Recursion case: create an internal node to in turn keep - // descending in the DAG and adding child nodes to it. - childNode, childFileSize, err = fillNodeRec(db, nil, depth-1) - if err != nil { - return nil, 0, err - } - } - - err = node.AddChild(childNode, childFileSize, db) - if err != nil { - return nil, 0, err - } - } - - nodeFileSize = node.FileSize() - - // Get the final `dag.ProtoNode` with the `FSNode` data encoded inside. - filledNode, err = node.Commit() - if err != nil { - return nil, 0, err - } - - return filledNode, nodeFileSize, nil -} diff --git a/unixfs/importer/helpers/dagbuilder.go b/unixfs/importer/helpers/dagbuilder.go deleted file mode 100644 index 5ac8ae0ef..000000000 --- a/unixfs/importer/helpers/dagbuilder.go +++ /dev/null @@ -1,458 +0,0 @@ -package helpers - -import ( - "context" - "io" - "os" - - dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - pb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb" - - pi "gx/ipfs/QmSHjPDw8yNgLZ7cBfX7w3Smn7PHwYhNEpd4LHQQxUg35L/go-ipfs-posinfo" - chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker" - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" - files "gx/ipfs/QmdE4gMduCKCGAcczM2F5ioYDfdeKuPix138wrES1YSr7f/go-ipfs-cmdkit/files" -) - -// DagBuilderHelper wraps together a bunch of objects needed to -// efficiently create unixfs dag trees -type DagBuilderHelper struct { - dserv ipld.DAGService - spl chunker.Splitter - recvdErr error - rawLeaves bool - nextData []byte // the next item to return. - maxlinks int - batch *ipld.Batch - prefix *cid.Prefix - - // Filestore support variables. - // ---------------------------- - // TODO: Encapsulate in `FilestoreNode` (which is basically what they are). - // - // Besides having the path this variable (if set) is used as a flag - // to indicate that Filestore should be used. - fullPath string - stat os.FileInfo - // Keeps track of the current file size added to the DAG (used in - // the balanced builder). It is assumed that the `DagBuilderHelper` - // is not reused to construct another DAG, but a new one (with a - // zero `offset`) is created. - offset uint64 -} - -// DagBuilderParams wraps configuration options to create a DagBuilderHelper -// from a chunker.Splitter. -type DagBuilderParams struct { - // Maximum number of links per intermediate node - Maxlinks int - - // RawLeaves signifies that the importer should use raw ipld nodes as leaves - // instead of using the unixfs TRaw type - RawLeaves bool - - // CID Prefix to use if set - Prefix *cid.Prefix - - // DAGService to write blocks to (required) - Dagserv ipld.DAGService - - // NoCopy signals to the chunker that it should track fileinfo for - // filestore adds - NoCopy bool - - // URL if non-empty (and NoCopy is also true) indicates that the - // file will not be stored in the datastore but instead retrieved - // from this location via the urlstore. - URL string -} - -// New generates a new DagBuilderHelper from the given params and a given -// chunker.Splitter as data source. -func (dbp *DagBuilderParams) New(spl chunker.Splitter) *DagBuilderHelper { - db := &DagBuilderHelper{ - dserv: dbp.Dagserv, - spl: spl, - rawLeaves: dbp.RawLeaves, - prefix: dbp.Prefix, - maxlinks: dbp.Maxlinks, - batch: ipld.NewBatch(context.TODO(), dbp.Dagserv), - } - if fi, ok := spl.Reader().(files.FileInfo); dbp.NoCopy && ok { - db.fullPath = fi.AbsPath() - db.stat = fi.Stat() - } - - if dbp.URL != "" && dbp.NoCopy { - db.fullPath = dbp.URL - } - return db -} - -// prepareNext consumes the next item from the splitter and puts it -// in the nextData field. it is idempotent-- if nextData is full -// it will do nothing. -func (db *DagBuilderHelper) prepareNext() { - // if we already have data waiting to be consumed, we're ready - if db.nextData != nil || db.recvdErr != nil { - return - } - - db.nextData, db.recvdErr = db.spl.NextBytes() - if db.recvdErr == io.EOF { - db.recvdErr = nil - } -} - -// Done returns whether or not we're done consuming the incoming data. -func (db *DagBuilderHelper) Done() bool { - // ensure we have an accurate perspective on data - // as `done` this may be called before `next`. - db.prepareNext() // idempotent - if db.recvdErr != nil { - return false - } - return db.nextData == nil -} - -// Next returns the next chunk of data to be inserted into the dag -// if it returns nil, that signifies that the stream is at an end, and -// that the current building operation should finish. -func (db *DagBuilderHelper) Next() ([]byte, error) { - db.prepareNext() // idempotent - d := db.nextData - db.nextData = nil // signal we've consumed it - if db.recvdErr != nil { - return nil, db.recvdErr - } - return d, nil -} - -// GetDagServ returns the dagservice object this Helper is using -func (db *DagBuilderHelper) GetDagServ() ipld.DAGService { - return db.dserv -} - -// NewUnixfsNode creates a new Unixfs node to represent a file. -func (db *DagBuilderHelper) NewUnixfsNode() *UnixfsNode { - n := &UnixfsNode{ - node: new(dag.ProtoNode), - ufmt: ft.NewFSNode(ft.TFile), - } - n.SetPrefix(db.prefix) - return n -} - -// GetPrefix returns the internal `cid.Prefix` set in the builder. -func (db *DagBuilderHelper) GetPrefix() *cid.Prefix { - return db.prefix -} - -// NewLeaf creates a leaf node filled with data. If rawLeaves is -// defined than a raw leaf will be returned. Otherwise, if data is -// nil the type field will be TRaw (for backwards compatibility), if -// data is defined (but possibly empty) the type field will be TRaw. -func (db *DagBuilderHelper) NewLeaf(data []byte) (*UnixfsNode, error) { - if len(data) > BlockSizeLimit { - return nil, ErrSizeLimitExceeded - } - - if db.rawLeaves { - if db.prefix == nil { - return &UnixfsNode{ - rawnode: dag.NewRawNode(data), - raw: true, - }, nil - } - rawnode, err := dag.NewRawNodeWPrefix(data, *db.prefix) - if err != nil { - return nil, err - } - return &UnixfsNode{ - rawnode: rawnode, - raw: true, - }, nil - } - - if data == nil { - return db.NewUnixfsNode(), nil - } - - blk := db.newUnixfsBlock() - blk.SetData(data) - return blk, nil -} - -// NewLeafNode is a variation from `NewLeaf` (see its description) that -// returns an `ipld.Node` instead. -func (db *DagBuilderHelper) NewLeafNode(data []byte) (ipld.Node, error) { - if len(data) > BlockSizeLimit { - return nil, ErrSizeLimitExceeded - } - - if db.rawLeaves { - // Encapsulate the data in a raw node. - if db.prefix == nil { - return dag.NewRawNode(data), nil - } - rawnode, err := dag.NewRawNodeWPrefix(data, *db.prefix) - if err != nil { - return nil, err - } - return rawnode, nil - } - - // Encapsulate the data in UnixFS node (instead of a raw node). - fsNodeOverDag := db.NewFSNodeOverDag(ft.TFile) - fsNodeOverDag.SetFileData(data) - node, err := fsNodeOverDag.Commit() - if err != nil { - return nil, err - } - // TODO: Encapsulate this sequence of calls into a function that - // just returns the final `ipld.Node` avoiding going through - // `FSNodeOverDag`. - // TODO: Using `TFile` for backwards-compatibility, a bug in the - // balanced builder was causing the leaf nodes to be generated - // with this type instead of `TRaw`, the one that should be used - // (like the trickle builder does). - // (See https://github.com/ipfs/go-ipfs/pull/5120.) - - return node, nil -} - -// newUnixfsBlock creates a new Unixfs node to represent a raw data block -func (db *DagBuilderHelper) newUnixfsBlock() *UnixfsNode { - n := &UnixfsNode{ - node: new(dag.ProtoNode), - ufmt: ft.NewFSNode(ft.TRaw), - } - n.SetPrefix(db.prefix) - return n -} - -// FillNodeLayer will add datanodes as children to the give node until -// at most db.indirSize nodes are added. -func (db *DagBuilderHelper) FillNodeLayer(node *UnixfsNode) error { - - // while we have room AND we're not done - for node.NumChildren() < db.maxlinks && !db.Done() { - child, err := db.GetNextDataNode() - if err != nil { - return err - } - - if err := node.AddChild(child, db); err != nil { - return err - } - } - - return nil -} - -// GetNextDataNode builds a UnixFsNode with the data obtained from the -// Splitter, given the constraints (BlockSizeLimit, RawLeaves) specified -// when creating the DagBuilderHelper. -func (db *DagBuilderHelper) GetNextDataNode() (*UnixfsNode, error) { - data, err := db.Next() - if err != nil { - return nil, err - } - - if data == nil { // we're done! - return nil, nil - } - - return db.NewLeaf(data) -} - -// NewLeafDataNode is a variation of `GetNextDataNode` that returns -// an `ipld.Node` instead. It builds the `node` with the data obtained -// from the Splitter and returns it with the `dataSize` (that will be -// used to keep track of the DAG file size). The size of the data is -// computed here because after that it will be hidden by `NewLeafNode` -// inside a generic `ipld.Node` representation. -func (db *DagBuilderHelper) NewLeafDataNode() (node ipld.Node, dataSize uint64, err error) { - fileData, err := db.Next() - if err != nil { - return nil, 0, err - } - dataSize = uint64(len(fileData)) - - // Create a new leaf node containing the file chunk data. - node, err = db.NewLeafNode(fileData) - if err != nil { - return nil, 0, err - } - - // Convert this leaf to a `FilestoreNode` if needed. - node = db.ProcessFileStore(node, dataSize) - - return node, dataSize, nil -} - -// ProcessFileStore generates, if Filestore is being used, the -// `FilestoreNode` representation of the `ipld.Node` that -// contains the file data. If Filestore is not being used just -// return the same node to continue with its addition to the DAG. -// -// The `db.offset` is updated at this point (instead of when -// `NewLeafDataNode` is called, both work in tandem but the -// offset is more related to this function). -func (db *DagBuilderHelper) ProcessFileStore(node ipld.Node, dataSize uint64) ipld.Node { - // Check if Filestore is being used. - if db.fullPath != "" { - // Check if the node is actually a raw node (needed for - // Filestore support). - if _, ok := node.(*dag.RawNode); ok { - fn := &pi.FilestoreNode{ - Node: node, - PosInfo: &pi.PosInfo{ - Offset: db.offset, - FullPath: db.fullPath, - Stat: db.stat, - }, - } - - // Update `offset` with the size of the data generated by `db.Next`. - db.offset += dataSize - - return fn - } - } - - // Filestore is not used, return the same `node` argument. - return node -} - -// Add sends a node to the DAGService, and returns it. -func (db *DagBuilderHelper) Add(node *UnixfsNode) (ipld.Node, error) { - dn, err := node.GetDagNode() - if err != nil { - return nil, err - } - - err = db.dserv.Add(context.TODO(), dn) - if err != nil { - return nil, err - } - - return dn, nil -} - -// Maxlinks returns the configured maximum number for links -// for nodes built with this helper. -func (db *DagBuilderHelper) Maxlinks() int { - return db.maxlinks -} - -// Close has the DAGService perform a batch Commit operation. -// It should be called at the end of the building process to make -// sure all data is persisted. -func (db *DagBuilderHelper) Close() error { - return db.batch.Commit() -} - -// AddNodeAndClose adds the last `ipld.Node` from the DAG and -// closes the builder. It returns the same `node` passed as -// argument. -func (db *DagBuilderHelper) AddNodeAndClose(node ipld.Node) (ipld.Node, error) { - err := db.batch.Add(node) - if err != nil { - return nil, err - } - - err = db.Close() - if err != nil { - return nil, err - } - - return node, nil -} - -// FSNodeOverDag encapsulates an `unixfs.FSNode` that will be stored in a -// `dag.ProtoNode`. Instead of just having a single `ipld.Node` that -// would need to be constantly (un)packed to access and modify its -// internal `FSNode` in the process of creating a UnixFS DAG, this -// structure stores an `FSNode` cache to manipulate it (add child nodes) -// directly , and only when the node has reached its final (immutable) state -// (signaled by calling `Commit()`) is it committed to a single (indivisible) -// `ipld.Node`. -// -// It is used mainly for internal (non-leaf) nodes, and for some -// representations of data leaf nodes (that don't use raw nodes or -// Filestore). -// -// It aims to replace the `UnixfsNode` structure which encapsulated too -// many possible node state combinations. -// -// TODO: Revisit the name. -type FSNodeOverDag struct { - dag *dag.ProtoNode - file *ft.FSNode -} - -// NewFSNodeOverDag creates a new `dag.ProtoNode` and `ft.FSNode` -// decoupled from one onther (and will continue in that way until -// `Commit` is called), with `fsNodeType` specifying the type of -// the UnixFS layer node (either `File` or `Raw`). -func (db *DagBuilderHelper) NewFSNodeOverDag(fsNodeType pb.Data_DataType) *FSNodeOverDag { - node := new(FSNodeOverDag) - node.dag = new(dag.ProtoNode) - node.dag.SetPrefix(db.GetPrefix()) - - node.file = ft.NewFSNode(fsNodeType) - - return node -} - -// AddChild adds a `child` `ipld.Node` to both node layers. The -// `dag.ProtoNode` creates a link to the child node while the -// `ft.FSNode` stores its file size (that is, not the size of the -// node but the size of the file data that it is storing at the -// UnixFS layer). The child is also stored in the `DAGService`. -func (n *FSNodeOverDag) AddChild(child ipld.Node, fileSize uint64, db *DagBuilderHelper) error { - err := n.dag.AddNodeLink("", child) - if err != nil { - return err - } - - n.file.AddBlockSize(fileSize) - - return db.batch.Add(child) -} - -// Commit unifies (resolves) the cache nodes into a single `ipld.Node` -// that represents them: the `ft.FSNode` is encoded inside the -// `dag.ProtoNode`. -// -// TODO: Evaluate making it read-only after committing. -func (n *FSNodeOverDag) Commit() (ipld.Node, error) { - fileData, err := n.file.GetBytes() - if err != nil { - return nil, err - } - n.dag.SetData(fileData) - - return n.dag, nil -} - -// NumChildren returns the number of children of the `ft.FSNode`. -func (n *FSNodeOverDag) NumChildren() int { - return n.file.NumChildren() -} - -// FileSize returns the `Filesize` attribute from the underlying -// representation of the `ft.FSNode`. -func (n *FSNodeOverDag) FileSize() uint64 { - return n.file.FileSize() -} - -// SetFileData stores the `fileData` in the `ft.FSNode`. It -// should be used only when `FSNodeOverDag` represents a leaf -// node (internal nodes don't carry data, just file sizes). -func (n *FSNodeOverDag) SetFileData(fileData []byte) { - n.file.SetData(fileData) -} diff --git a/unixfs/importer/helpers/helpers.go b/unixfs/importer/helpers/helpers.go deleted file mode 100644 index 312ba67f0..000000000 --- a/unixfs/importer/helpers/helpers.go +++ /dev/null @@ -1,173 +0,0 @@ -package helpers - -import ( - "context" - "fmt" - "os" - - dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - - pi "gx/ipfs/QmSHjPDw8yNgLZ7cBfX7w3Smn7PHwYhNEpd4LHQQxUg35L/go-ipfs-posinfo" - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// BlockSizeLimit specifies the maximum size an imported block can have. -var BlockSizeLimit = 1048576 // 1 MB - -// rough estimates on expected sizes -var roughLinkBlockSize = 1 << 13 // 8KB -var roughLinkSize = 34 + 8 + 5 // sha256 multihash + size + no name + protobuf framing - -// DefaultLinksPerBlock governs how the importer decides how many links there -// will be per block. This calculation is based on expected distributions of: -// * the expected distribution of block sizes -// * the expected distribution of link sizes -// * desired access speed -// For now, we use: -// -// var roughLinkBlockSize = 1 << 13 // 8KB -// var roughLinkSize = 288 // sha256 + framing + name -// var DefaultLinksPerBlock = (roughLinkBlockSize / roughLinkSize) -// -// See calc_test.go -var DefaultLinksPerBlock = roughLinkBlockSize / roughLinkSize - -// ErrSizeLimitExceeded signals that a block is larger than BlockSizeLimit. -var ErrSizeLimitExceeded = fmt.Errorf("object size limit exceeded") - -// UnixfsNode is a struct created to aid in the generation -// of unixfs DAG trees -type UnixfsNode struct { - raw bool - rawnode *dag.RawNode - node *dag.ProtoNode - ufmt *ft.FSNode - posInfo *pi.PosInfo -} - -// NewUnixfsNodeFromDag reconstructs a Unixfs node from a given dag node -func NewUnixfsNodeFromDag(nd *dag.ProtoNode) (*UnixfsNode, error) { - mb, err := ft.FSNodeFromBytes(nd.Data()) - if err != nil { - return nil, err - } - - return &UnixfsNode{ - node: nd, - ufmt: mb, - }, nil -} - -// SetPrefix sets the CID Prefix -func (n *UnixfsNode) SetPrefix(prefix *cid.Prefix) { - n.node.SetPrefix(prefix) -} - -// NumChildren returns the number of children referenced by this UnixfsNode. -func (n *UnixfsNode) NumChildren() int { - return n.ufmt.NumChildren() -} - -// GetChild gets the ith child of this node from the given DAGService. -func (n *UnixfsNode) GetChild(ctx context.Context, i int, ds ipld.DAGService) (*UnixfsNode, error) { - nd, err := n.node.Links()[i].GetNode(ctx, ds) - if err != nil { - return nil, err - } - - pbn, ok := nd.(*dag.ProtoNode) - if !ok { - return nil, dag.ErrNotProtobuf - } - - return NewUnixfsNodeFromDag(pbn) -} - -// AddChild adds the given UnixfsNode as a child of the receiver. -// The passed in DagBuilderHelper is used to store the child node an -// pin it locally so it doesnt get lost. -func (n *UnixfsNode) AddChild(child *UnixfsNode, db *DagBuilderHelper) error { - n.ufmt.AddBlockSize(child.FileSize()) - - childnode, err := child.GetDagNode() - if err != nil { - return err - } - - // Add a link to this node without storing a reference to the memory - // This way, we avoid nodes building up and consuming all of our RAM - err = n.node.AddNodeLink("", childnode) - if err != nil { - return err - } - - err = db.batch.Add(childnode) - - return err -} - -// RemoveChild deletes the child node at the given index. -func (n *UnixfsNode) RemoveChild(index int, dbh *DagBuilderHelper) { - n.ufmt.RemoveBlockSize(index) - n.node.SetLinks(append(n.node.Links()[:index], n.node.Links()[index+1:]...)) -} - -// SetData stores data in this node. -func (n *UnixfsNode) SetData(data []byte) { - n.ufmt.SetData(data) -} - -// FileSize returns the total file size of this tree (including children) -// In the case of raw nodes, it returns the length of the -// raw data. -func (n *UnixfsNode) FileSize() uint64 { - if n.raw { - return uint64(len(n.rawnode.RawData())) - } - return n.ufmt.FileSize() -} - -// SetPosInfo sets information about the offset of the data of this node in a -// filesystem file. -func (n *UnixfsNode) SetPosInfo(offset uint64, fullPath string, stat os.FileInfo) { - n.posInfo = &pi.PosInfo{ - Offset: offset, - FullPath: fullPath, - Stat: stat, - } -} - -// GetDagNode fills out the proper formatting for the unixfs node -// inside of a DAG node and returns the dag node. -func (n *UnixfsNode) GetDagNode() (ipld.Node, error) { - nd, err := n.getBaseDagNode() - if err != nil { - return nil, err - } - - if n.posInfo != nil { - if rn, ok := nd.(*dag.RawNode); ok { - return &pi.FilestoreNode{ - Node: rn, - PosInfo: n.posInfo, - }, nil - } - } - - return nd, nil -} - -func (n *UnixfsNode) getBaseDagNode() (ipld.Node, error) { - if n.raw { - return n.rawnode, nil - } - - data, err := n.ufmt.GetBytes() - if err != nil { - return nil, err - } - n.node.SetData(data) - return n.node, nil -} diff --git a/unixfs/importer/importer.go b/unixfs/importer/importer.go deleted file mode 100644 index eeb8090c3..000000000 --- a/unixfs/importer/importer.go +++ /dev/null @@ -1,34 +0,0 @@ -// Package importer implements utilities used to create IPFS DAGs from files -// and readers. -package importer - -import ( - chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" - - bal "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/balanced" - h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers" - trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle" -) - -// BuildDagFromReader creates a DAG given a DAGService and a Splitter -// implementation (Splitters are io.Readers), using a Balanced layout. -func BuildDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) { - dbp := h.DagBuilderParams{ - Dagserv: ds, - Maxlinks: h.DefaultLinksPerBlock, - } - - return bal.Layout(dbp.New(spl)) -} - -// BuildTrickleDagFromReader creates a DAG given a DAGService and a Splitter -// implementation (Splitters are io.Readers), using a Trickle Layout. -func BuildTrickleDagFromReader(ds ipld.DAGService, spl chunker.Splitter) (ipld.Node, error) { - dbp := h.DagBuilderParams{ - Dagserv: ds, - Maxlinks: h.DefaultLinksPerBlock, - } - - return trickle.Layout(dbp.New(spl)) -} diff --git a/unixfs/importer/importer_test.go b/unixfs/importer/importer_test.go deleted file mode 100644 index 25a5356db..000000000 --- a/unixfs/importer/importer_test.go +++ /dev/null @@ -1,118 +0,0 @@ -package importer - -import ( - "bytes" - "context" - "io" - "io/ioutil" - "testing" - - mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test" - uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io" - - u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util" - chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -func getBalancedDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) { - ds := mdtest.Mock() - r := io.LimitReader(u.NewTimeSeededRand(), size) - nd, err := BuildDagFromReader(ds, chunker.NewSizeSplitter(r, blksize)) - if err != nil { - t.Fatal(err) - } - return nd, ds -} - -func getTrickleDag(t testing.TB, size int64, blksize int64) (ipld.Node, ipld.DAGService) { - ds := mdtest.Mock() - r := io.LimitReader(u.NewTimeSeededRand(), size) - nd, err := BuildTrickleDagFromReader(ds, chunker.NewSizeSplitter(r, blksize)) - if err != nil { - t.Fatal(err) - } - return nd, ds -} - -func TestBalancedDag(t *testing.T) { - ds := mdtest.Mock() - buf := make([]byte, 10000) - u.NewTimeSeededRand().Read(buf) - r := bytes.NewReader(buf) - - nd, err := BuildDagFromReader(ds, chunker.DefaultSplitter(r)) - if err != nil { - t.Fatal(err) - } - - dr, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(dr) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(out, buf) { - t.Fatal("bad read") - } -} - -func BenchmarkBalancedReadSmallBlock(b *testing.B) { - b.StopTimer() - nbytes := int64(10000000) - nd, ds := getBalancedDag(b, nbytes, 4096) - - b.SetBytes(nbytes) - b.StartTimer() - runReadBench(b, nd, ds) -} - -func BenchmarkTrickleReadSmallBlock(b *testing.B) { - b.StopTimer() - nbytes := int64(10000000) - nd, ds := getTrickleDag(b, nbytes, 4096) - - b.SetBytes(nbytes) - b.StartTimer() - runReadBench(b, nd, ds) -} - -func BenchmarkBalancedReadFull(b *testing.B) { - b.StopTimer() - nbytes := int64(10000000) - nd, ds := getBalancedDag(b, nbytes, chunker.DefaultBlockSize) - - b.SetBytes(nbytes) - b.StartTimer() - runReadBench(b, nd, ds) -} - -func BenchmarkTrickleReadFull(b *testing.B) { - b.StopTimer() - nbytes := int64(10000000) - nd, ds := getTrickleDag(b, nbytes, chunker.DefaultBlockSize) - - b.SetBytes(nbytes) - b.StartTimer() - runReadBench(b, nd, ds) -} - -func runReadBench(b *testing.B, nd ipld.Node, ds ipld.DAGService) { - for i := 0; i < b.N; i++ { - ctx, cancel := context.WithCancel(context.Background()) - read, err := uio.NewDagReader(ctx, nd, ds) - if err != nil { - b.Fatal(err) - } - - _, err = read.WriteTo(ioutil.Discard) - if err != nil && err != io.EOF { - b.Fatal(err) - } - cancel() - } -} diff --git a/unixfs/importer/trickle/trickle_test.go b/unixfs/importer/trickle/trickle_test.go deleted file mode 100644 index c1458a67d..000000000 --- a/unixfs/importer/trickle/trickle_test.go +++ /dev/null @@ -1,640 +0,0 @@ -package trickle - -import ( - "bytes" - "context" - "fmt" - "io" - "io/ioutil" - mrand "math/rand" - "testing" - - merkledag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers" - uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io" - - u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util" - chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -type UseRawLeaves bool - -const ( - ProtoBufLeaves UseRawLeaves = false - RawLeaves UseRawLeaves = true -) - -func runBothSubtests(t *testing.T, tfunc func(*testing.T, UseRawLeaves)) { - t.Run("leaves=ProtoBuf", func(t *testing.T) { tfunc(t, ProtoBufLeaves) }) - t.Run("leaves=Raw", func(t *testing.T) { tfunc(t, RawLeaves) }) -} - -func buildTestDag(ds ipld.DAGService, spl chunker.Splitter, rawLeaves UseRawLeaves) (*merkledag.ProtoNode, error) { - dbp := h.DagBuilderParams{ - Dagserv: ds, - Maxlinks: h.DefaultLinksPerBlock, - RawLeaves: bool(rawLeaves), - } - - nd, err := Layout(dbp.New(spl)) - if err != nil { - return nil, err - } - - pbnd, ok := nd.(*merkledag.ProtoNode) - if !ok { - return nil, merkledag.ErrNotProtobuf - } - - return pbnd, VerifyTrickleDagStructure(pbnd, VerifyParams{ - Getter: ds, - Direct: dbp.Maxlinks, - LayerRepeat: layerRepeat, - RawLeaves: bool(rawLeaves), - }) -} - -//Test where calls to read are smaller than the chunk size -func TestSizeBasedSplit(t *testing.T) { - runBothSubtests(t, testSizeBasedSplit) -} - -func testSizeBasedSplit(t *testing.T, rawLeaves UseRawLeaves) { - if testing.Short() { - t.SkipNow() - } - bs := chunker.SizeSplitterGen(512) - testFileConsistency(t, bs, 32*512, rawLeaves) - - bs = chunker.SizeSplitterGen(4096) - testFileConsistency(t, bs, 32*4096, rawLeaves) - - // Uneven offset - testFileConsistency(t, bs, 31*4095, rawLeaves) -} - -func dup(b []byte) []byte { - o := make([]byte, len(b)) - copy(o, b) - return o -} - -func testFileConsistency(t *testing.T, bs chunker.SplitterGen, nbytes int, rawLeaves UseRawLeaves) { - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, bs(read), rawLeaves) - if err != nil { - t.Fatal(err) - } - - r, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(r) - if err != nil { - t.Fatal(err) - } - - err = arrComp(out, should) - if err != nil { - t.Fatal(err) - } -} - -func TestBuilderConsistency(t *testing.T) { - runBothSubtests(t, testBuilderConsistency) -} - -func testBuilderConsistency(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := 100000 - buf := new(bytes.Buffer) - io.CopyN(buf, u.NewTimeSeededRand(), int64(nbytes)) - should := dup(buf.Bytes()) - dagserv := mdtest.Mock() - nd, err := buildTestDag(dagserv, chunker.DefaultSplitter(buf), rawLeaves) - if err != nil { - t.Fatal(err) - } - r, err := uio.NewDagReader(context.Background(), nd, dagserv) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(r) - if err != nil { - t.Fatal(err) - } - - err = arrComp(out, should) - if err != nil { - t.Fatal(err) - } -} - -func arrComp(a, b []byte) error { - if len(a) != len(b) { - return fmt.Errorf("arrays differ in length. %d != %d", len(a), len(b)) - } - for i, v := range a { - if v != b[i] { - return fmt.Errorf("arrays differ at index: %d", i) - } - } - return nil -} - -func TestIndirectBlocks(t *testing.T) { - runBothSubtests(t, testIndirectBlocks) -} - -func testIndirectBlocks(t *testing.T, rawLeaves UseRawLeaves) { - splitter := chunker.SizeSplitterGen(512) - nbytes := 1024 * 1024 - buf := make([]byte, nbytes) - u.NewTimeSeededRand().Read(buf) - - read := bytes.NewReader(buf) - - ds := mdtest.Mock() - dag, err := buildTestDag(ds, splitter(read), rawLeaves) - if err != nil { - t.Fatal(err) - } - - reader, err := uio.NewDagReader(context.Background(), dag, ds) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(reader) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(out, buf) { - t.Fatal("Not equal!") - } -} - -func TestSeekingBasic(t *testing.T) { - runBothSubtests(t, testSeekingBasic) -} - -func testSeekingBasic(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(10 * 1024) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 512), rawLeaves) - if err != nil { - t.Fatal(err) - } - - rs, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - start := int64(4000) - n, err := rs.Seek(start, io.SeekStart) - if err != nil { - t.Fatal(err) - } - if n != start { - t.Fatal("Failed to seek to correct offset") - } - - out, err := ioutil.ReadAll(rs) - if err != nil { - t.Fatal(err) - } - - err = arrComp(out, should[start:]) - if err != nil { - t.Fatal(err) - } -} - -func TestSeekToBegin(t *testing.T) { - runBothSubtests(t, testSeekToBegin) -} - -func testSeekToBegin(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(10 * 1024) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves) - if err != nil { - t.Fatal(err) - } - - rs, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - n, err := io.CopyN(ioutil.Discard, rs, 1024*4) - if err != nil { - t.Fatal(err) - } - if n != 4096 { - t.Fatal("Copy didnt copy enough bytes") - } - - seeked, err := rs.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - if seeked != 0 { - t.Fatal("Failed to seek to beginning") - } - - out, err := ioutil.ReadAll(rs) - if err != nil { - t.Fatal(err) - } - - err = arrComp(out, should) - if err != nil { - t.Fatal(err) - } -} - -func TestSeekToAlmostBegin(t *testing.T) { - runBothSubtests(t, testSeekToAlmostBegin) -} - -func testSeekToAlmostBegin(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(10 * 1024) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves) - if err != nil { - t.Fatal(err) - } - - rs, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - n, err := io.CopyN(ioutil.Discard, rs, 1024*4) - if err != nil { - t.Fatal(err) - } - if n != 4096 { - t.Fatal("Copy didnt copy enough bytes") - } - - seeked, err := rs.Seek(1, io.SeekStart) - if err != nil { - t.Fatal(err) - } - if seeked != 1 { - t.Fatal("Failed to seek to almost beginning") - } - - out, err := ioutil.ReadAll(rs) - if err != nil { - t.Fatal(err) - } - - err = arrComp(out, should[1:]) - if err != nil { - t.Fatal(err) - } -} - -func TestSeekEnd(t *testing.T) { - runBothSubtests(t, testSeekEnd) -} - -func testSeekEnd(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(50 * 1024) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves) - if err != nil { - t.Fatal(err) - } - - rs, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - seeked, err := rs.Seek(0, io.SeekEnd) - if err != nil { - t.Fatal(err) - } - if seeked != nbytes { - t.Fatal("Failed to seek to end") - } -} - -func TestSeekEndSingleBlockFile(t *testing.T) { - runBothSubtests(t, testSeekEndSingleBlockFile) -} - -func testSeekEndSingleBlockFile(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(100) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 5000), rawLeaves) - if err != nil { - t.Fatal(err) - } - - rs, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - seeked, err := rs.Seek(0, io.SeekEnd) - if err != nil { - t.Fatal(err) - } - if seeked != nbytes { - t.Fatal("Failed to seek to end") - } -} - -func TestSeekingStress(t *testing.T) { - runBothSubtests(t, testSeekingStress) -} - -func testSeekingStress(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(1024 * 1024) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 1000), rawLeaves) - if err != nil { - t.Fatal(err) - } - - rs, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - testbuf := make([]byte, nbytes) - for i := 0; i < 50; i++ { - offset := mrand.Intn(int(nbytes)) - l := int(nbytes) - offset - n, err := rs.Seek(int64(offset), io.SeekStart) - if err != nil { - t.Fatal(err) - } - if n != int64(offset) { - t.Fatal("Seek failed to move to correct position") - } - - nread, err := rs.Read(testbuf[:l]) - if err != nil { - t.Fatal(err) - } - if nread != l { - t.Fatal("Failed to read enough bytes") - } - - err = arrComp(testbuf[:l], should[offset:offset+l]) - if err != nil { - t.Fatal(err) - } - } - -} - -func TestSeekingConsistency(t *testing.T) { - runBothSubtests(t, testSeekingConsistency) -} - -func testSeekingConsistency(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(128 * 1024) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(should) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves) - if err != nil { - t.Fatal(err) - } - - rs, err := uio.NewDagReader(context.Background(), nd, ds) - if err != nil { - t.Fatal(err) - } - - out := make([]byte, nbytes) - - for coff := nbytes - 4096; coff >= 0; coff -= 4096 { - t.Log(coff) - n, err := rs.Seek(coff, io.SeekStart) - if err != nil { - t.Fatal(err) - } - if n != coff { - t.Fatal("wasnt able to seek to the right position") - } - nread, err := rs.Read(out[coff : coff+4096]) - if err != nil { - t.Fatal(err) - } - if nread != 4096 { - t.Fatal("didnt read the correct number of bytes") - } - } - - err = arrComp(out, should) - if err != nil { - t.Fatal(err) - } -} - -func TestAppend(t *testing.T) { - runBothSubtests(t, testAppend) -} - -func testAppend(t *testing.T, rawLeaves UseRawLeaves) { - nbytes := int64(128 * 1024) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - // Reader for half the bytes - read := bytes.NewReader(should[:nbytes/2]) - ds := mdtest.Mock() - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves) - if err != nil { - t.Fatal(err) - } - - dbp := &h.DagBuilderParams{ - Dagserv: ds, - Maxlinks: h.DefaultLinksPerBlock, - RawLeaves: bool(rawLeaves), - } - - r := bytes.NewReader(should[nbytes/2:]) - - ctx := context.Background() - nnode, err := Append(ctx, nd, dbp.New(chunker.NewSizeSplitter(r, 500))) - if err != nil { - t.Fatal(err) - } - - err = VerifyTrickleDagStructure(nnode, VerifyParams{ - Getter: ds, - Direct: dbp.Maxlinks, - LayerRepeat: layerRepeat, - RawLeaves: bool(rawLeaves), - }) - if err != nil { - t.Fatal(err) - } - - fread, err := uio.NewDagReader(ctx, nnode, ds) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(fread) - if err != nil { - t.Fatal(err) - } - - err = arrComp(out, should) - if err != nil { - t.Fatal(err) - } -} - -// This test appends one byte at a time to an empty file -func TestMultipleAppends(t *testing.T) { - runBothSubtests(t, testMultipleAppends) -} - -func testMultipleAppends(t *testing.T, rawLeaves UseRawLeaves) { - ds := mdtest.Mock() - - // TODO: fix small size appends and make this number bigger - nbytes := int64(1000) - should := make([]byte, nbytes) - u.NewTimeSeededRand().Read(should) - - read := bytes.NewReader(nil) - nd, err := buildTestDag(ds, chunker.NewSizeSplitter(read, 500), rawLeaves) - if err != nil { - t.Fatal(err) - } - - dbp := &h.DagBuilderParams{ - Dagserv: ds, - Maxlinks: 4, - RawLeaves: bool(rawLeaves), - } - - spl := chunker.SizeSplitterGen(500) - - ctx := context.Background() - for i := 0; i < len(should); i++ { - - nnode, err := Append(ctx, nd, dbp.New(spl(bytes.NewReader(should[i:i+1])))) - if err != nil { - t.Fatal(err) - } - - err = VerifyTrickleDagStructure(nnode, VerifyParams{ - Getter: ds, - Direct: dbp.Maxlinks, - LayerRepeat: layerRepeat, - RawLeaves: bool(rawLeaves), - }) - if err != nil { - t.Fatal(err) - } - - fread, err := uio.NewDagReader(ctx, nnode, ds) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(fread) - if err != nil { - t.Fatal(err) - } - - err = arrComp(out, should[:i+1]) - if err != nil { - t.Fatal(err) - } - } -} - -func TestAppendSingleBytesToEmpty(t *testing.T) { - ds := mdtest.Mock() - - data := []byte("AB") - - nd := new(merkledag.ProtoNode) - nd.SetData(ft.FilePBData(nil, 0)) - - dbp := &h.DagBuilderParams{ - Dagserv: ds, - Maxlinks: 4, - } - - spl := chunker.SizeSplitterGen(500) - - ctx := context.Background() - nnode, err := Append(ctx, nd, dbp.New(spl(bytes.NewReader(data[:1])))) - if err != nil { - t.Fatal(err) - } - - nnode, err = Append(ctx, nnode, dbp.New(spl(bytes.NewReader(data[1:])))) - if err != nil { - t.Fatal(err) - } - - fread, err := uio.NewDagReader(ctx, nnode, ds) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(fread) - if err != nil { - t.Fatal(err) - } - - fmt.Println(out, data) - err = arrComp(out, data) - if err != nil { - t.Fatal(err) - } -} diff --git a/unixfs/importer/trickle/trickledag.go b/unixfs/importer/trickle/trickledag.go deleted file mode 100644 index 45d5c83df..000000000 --- a/unixfs/importer/trickle/trickledag.go +++ /dev/null @@ -1,366 +0,0 @@ -// Package trickle allows to build trickle DAGs. -// In this type of DAG, non-leave nodes are first filled -// with data leaves, and then incorporate "layers" of subtrees -// as additional links. -// -// Each layer is a trickle sub-tree and is limited by an increasing -// maximum depth. Thus, the nodes first layer -// can only hold leaves (depth 1) but subsequent layers can grow deeper. -// By default, this module places 4 nodes per layer (that is, 4 subtrees -// of the same maximum depth before increasing it). -// -// Trickle DAGs are very good for sequentially reading data, as the -// first data leaves are directly reachable from the root and those -// coming next are always nearby. They are -// suited for things like streaming applications. -package trickle - -import ( - "context" - "errors" - "fmt" - - dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers" - - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// layerRepeat specifies how many times to append a child tree of a -// given depth. Higher values increase the width of a given node, which -// improves seek speeds. -const layerRepeat = 4 - -// Layout builds a new DAG with the trickle format using the provided -// DagBuilderHelper. See the module's description for a more detailed -// explanation. -func Layout(db *h.DagBuilderHelper) (ipld.Node, error) { - root := db.NewUnixfsNode() - if err := fillTrickleRec(db, root, -1); err != nil { - return nil, err - } - - out, err := db.Add(root) - if err != nil { - return nil, err - } - - if err := db.Close(); err != nil { - return nil, err - } - - return out, nil -} - -// fillTrickleRec creates a trickle (sub-)tree with an optional maximum specified depth -// in the case maxDepth is greater than zero, or with unlimited depth otherwise -// (where the DAG builder will signal the end of data to end the function). -func fillTrickleRec(db *h.DagBuilderHelper, node *h.UnixfsNode, maxDepth int) error { - // Always do this, even in the base case - if err := db.FillNodeLayer(node); err != nil { - return err - } - - for depth := 1; ; depth++ { - // Apply depth limit only if the parameter is set (> 0). - if maxDepth > 0 && depth == maxDepth { - return nil - } - for layer := 0; layer < layerRepeat; layer++ { - if db.Done() { - return nil - } - - nextChild := db.NewUnixfsNode() - if err := fillTrickleRec(db, nextChild, depth); err != nil { - return err - } - - if err := node.AddChild(nextChild, db); err != nil { - return err - } - } - } -} - -// Append appends the data in `db` to the dag, using the Trickledag format -func Append(ctx context.Context, basen ipld.Node, db *h.DagBuilderHelper) (out ipld.Node, errOut error) { - base, ok := basen.(*dag.ProtoNode) - if !ok { - return nil, dag.ErrNotProtobuf - } - - defer func() { - if errOut == nil { - if err := db.Close(); err != nil { - errOut = err - } - } - }() - - // Convert to unixfs node for working with easily - ufsn, err := h.NewUnixfsNodeFromDag(base) - if err != nil { - return nil, err - } - - // Get depth of this 'tree' - n, layerProgress := trickleDepthInfo(ufsn, db.Maxlinks()) - if n == 0 { - // If direct blocks not filled... - if err := db.FillNodeLayer(ufsn); err != nil { - return nil, err - } - - if db.Done() { - return ufsn.GetDagNode() - } - - // If continuing, our depth has increased by one - n++ - } - - // Last child in this node may not be a full tree, lets file it up - if err := appendFillLastChild(ctx, ufsn, n-1, layerProgress, db); err != nil { - return nil, err - } - - // after appendFillLastChild, our depth is now increased by one - if !db.Done() { - n++ - } - - // Now, continue filling out tree like normal - for i := n; !db.Done(); i++ { - for j := 0; j < layerRepeat && !db.Done(); j++ { - next := db.NewUnixfsNode() - err := fillTrickleRec(db, next, i) - if err != nil { - return nil, err - } - - err = ufsn.AddChild(next, db) - if err != nil { - return nil, err - } - } - } - - return ufsn.GetDagNode() -} - -// appendFillLastChild will take in an incomplete trickledag node (uncomplete meaning, not full) and -// fill it out to the specified depth with blocks from the given DagBuilderHelper -func appendFillLastChild(ctx context.Context, ufsn *h.UnixfsNode, depth int, layerFill int, db *h.DagBuilderHelper) error { - if ufsn.NumChildren() <= db.Maxlinks() { - return nil - } - // Recursive step, grab last child - last := ufsn.NumChildren() - 1 - lastChild, err := ufsn.GetChild(ctx, last, db.GetDagServ()) - if err != nil { - return err - } - - // Fill out last child (may not be full tree) - nchild, err := appendRec(ctx, lastChild, db, depth-1) - if err != nil { - return err - } - - // Update changed child in parent node - ufsn.RemoveChild(last, db) - err = ufsn.AddChild(nchild, db) - if err != nil { - return err - } - - // Partially filled depth layer - if layerFill != 0 { - for ; layerFill < layerRepeat && !db.Done(); layerFill++ { - next := db.NewUnixfsNode() - err := fillTrickleRec(db, next, depth) - if err != nil { - return err - } - - err = ufsn.AddChild(next, db) - if err != nil { - return err - } - } - } - - return nil -} - -// recursive call for Append -func appendRec(ctx context.Context, ufsn *h.UnixfsNode, db *h.DagBuilderHelper, depth int) (*h.UnixfsNode, error) { - if depth == 0 || db.Done() { - return ufsn, nil - } - - // Get depth of this 'tree' - n, layerProgress := trickleDepthInfo(ufsn, db.Maxlinks()) - if n == 0 { - // If direct blocks not filled... - if err := db.FillNodeLayer(ufsn); err != nil { - return nil, err - } - n++ - } - - // If at correct depth, no need to continue - if n == depth { - return ufsn, nil - } - - if err := appendFillLastChild(ctx, ufsn, n, layerProgress, db); err != nil { - return nil, err - } - - // after appendFillLastChild, our depth is now increased by one - if !db.Done() { - n++ - } - - // Now, continue filling out tree like normal - for i := n; i < depth && !db.Done(); i++ { - for j := 0; j < layerRepeat && !db.Done(); j++ { - next := db.NewUnixfsNode() - if err := fillTrickleRec(db, next, i); err != nil { - return nil, err - } - - if err := ufsn.AddChild(next, db); err != nil { - return nil, err - } - } - } - - return ufsn, nil -} - -func trickleDepthInfo(node *h.UnixfsNode, maxlinks int) (int, int) { - n := node.NumChildren() - if n < maxlinks { - return 0, 0 - } - - return ((n - maxlinks) / layerRepeat) + 1, (n - maxlinks) % layerRepeat -} - -// VerifyParams is used by VerifyTrickleDagStructure -type VerifyParams struct { - Getter ipld.NodeGetter - Direct int - LayerRepeat int - Prefix *cid.Prefix - RawLeaves bool -} - -// VerifyTrickleDagStructure checks that the given dag matches exactly the trickle dag datastructure -// layout -func VerifyTrickleDagStructure(nd ipld.Node, p VerifyParams) error { - return verifyTDagRec(nd, -1, p) -} - -// Recursive call for verifying the structure of a trickledag -func verifyTDagRec(n ipld.Node, depth int, p VerifyParams) error { - codec := cid.DagProtobuf - if depth == 0 { - if len(n.Links()) > 0 { - return errors.New("expected direct block") - } - // zero depth dag is raw data block - switch nd := n.(type) { - case *dag.ProtoNode: - pbn, err := ft.FromBytes(nd.Data()) - if err != nil { - return err - } - - if pbn.GetType() != ft.TRaw { - return errors.New("expected raw block") - } - - if p.RawLeaves { - return errors.New("expected raw leaf, got a protobuf node") - } - case *dag.RawNode: - if !p.RawLeaves { - return errors.New("expected protobuf node as leaf") - } - codec = cid.Raw - default: - return errors.New("expected ProtoNode or RawNode") - } - } - - // verify prefix - if p.Prefix != nil { - prefix := n.Cid().Prefix() - expect := *p.Prefix // make a copy - expect.Codec = uint64(codec) - if codec == cid.Raw && expect.Version == 0 { - expect.Version = 1 - } - if expect.MhLength == -1 { - expect.MhLength = prefix.MhLength - } - if prefix != expect { - return fmt.Errorf("unexpected cid prefix: expected: %v; got %v", expect, prefix) - } - } - - if depth == 0 { - return nil - } - - nd, ok := n.(*dag.ProtoNode) - if !ok { - return errors.New("expected ProtoNode") - } - - // Verify this is a branch node - pbn, err := ft.FromBytes(nd.Data()) - if err != nil { - return err - } - - if pbn.GetType() != ft.TFile { - return fmt.Errorf("expected file as branch node, got: %s", pbn.GetType()) - } - - if len(pbn.Data) > 0 { - return errors.New("branch node should not have data") - } - - for i := 0; i < len(nd.Links()); i++ { - child, err := nd.Links()[i].GetNode(context.TODO(), p.Getter) - if err != nil { - return err - } - - if i < p.Direct { - // Direct blocks - err := verifyTDagRec(child, 0, p) - if err != nil { - return err - } - } else { - // Recursive trickle dags - rdepth := ((i - p.Direct) / p.LayerRepeat) + 1 - if rdepth >= depth && depth > 0 { - return errors.New("child dag was too deep") - } - err := verifyTDagRec(child, rdepth, p) - if err != nil { - return err - } - } - } - return nil -} diff --git a/unixfs/io/bufdagreader.go b/unixfs/io/bufdagreader.go deleted file mode 100644 index 48efe98ad..000000000 --- a/unixfs/io/bufdagreader.go +++ /dev/null @@ -1,39 +0,0 @@ -package io - -import ( - "bytes" - "context" -) - -// BufDagReader implements a DagReader that reads from a byte slice -// using a bytes.Reader. It is used for RawNodes. -type BufDagReader struct { - *bytes.Reader -} - -// NewBufDagReader returns a DAG reader for the given byte slice. -// BufDagReader is used to read RawNodes. -func NewBufDagReader(b []byte) *BufDagReader { - return &BufDagReader{bytes.NewReader(b)} -} - -var _ DagReader = (*BufDagReader)(nil) - -// Close is a nop. -func (*BufDagReader) Close() error { - return nil -} - -// CtxReadFull reads the slice onto b. -func (rd *BufDagReader) CtxReadFull(ctx context.Context, b []byte) (int, error) { - return rd.Read(b) -} - -// Size returns the size of the buffer. -func (rd *BufDagReader) Size() uint64 { - s := rd.Reader.Size() - if s < 0 { - panic("size smaller than 0 (impossible!!)") - } - return uint64(s) -} diff --git a/unixfs/io/dagreader.go b/unixfs/io/dagreader.go deleted file mode 100644 index 92da15990..000000000 --- a/unixfs/io/dagreader.go +++ /dev/null @@ -1,79 +0,0 @@ -package io - -import ( - "context" - "errors" - "io" - - mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - ftpb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb" - - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// Common errors -var ( - ErrIsDir = errors.New("this dag node is a directory") - ErrCantReadSymlinks = errors.New("cannot currently read symlinks") - ErrUnkownNodeType = errors.New("unknown node type") -) - -// A DagReader provides read-only read and seek acess to a unixfs file. -// Different implementations of readers are used for the different -// types of unixfs/protobuf-encoded nodes. -type DagReader interface { - ReadSeekCloser - Size() uint64 - CtxReadFull(context.Context, []byte) (int, error) -} - -// A ReadSeekCloser implements interfaces to read, copy, seek and close. -type ReadSeekCloser interface { - io.Reader - io.Seeker - io.Closer - io.WriterTo -} - -// NewDagReader creates a new reader object that reads the data represented by -// the given node, using the passed in DAGService for data retrieval -func NewDagReader(ctx context.Context, n ipld.Node, serv ipld.NodeGetter) (DagReader, error) { - switch n := n.(type) { - case *mdag.RawNode: - return NewBufDagReader(n.RawData()), nil - case *mdag.ProtoNode: - fsNode, err := ft.FSNodeFromBytes(n.Data()) - if err != nil { - return nil, err - } - - switch fsNode.Type() { - case ftpb.Data_Directory, ftpb.Data_HAMTShard: - // Dont allow reading directories - return nil, ErrIsDir - case ftpb.Data_File, ftpb.Data_Raw: - return NewPBFileReader(ctx, n, fsNode, serv), nil - case ftpb.Data_Metadata: - if len(n.Links()) == 0 { - return nil, errors.New("incorrectly formatted metadata object") - } - child, err := n.Links()[0].GetNode(ctx, serv) - if err != nil { - return nil, err - } - - childpb, ok := child.(*mdag.ProtoNode) - if !ok { - return nil, mdag.ErrNotProtobuf - } - return NewDagReader(ctx, childpb, serv) - case ftpb.Data_Symlink: - return nil, ErrCantReadSymlinks - default: - return nil, ft.ErrUnrecognizedType - } - default: - return nil, ErrUnkownNodeType - } -} diff --git a/unixfs/io/dagreader_test.go b/unixfs/io/dagreader_test.go deleted file mode 100644 index b44578196..000000000 --- a/unixfs/io/dagreader_test.go +++ /dev/null @@ -1,347 +0,0 @@ -package io - -import ( - "bytes" - "io" - "io/ioutil" - "math/rand" - "strings" - "testing" - - mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - - context "context" - - testu "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/test" -) - -func TestBasicRead(t *testing.T) { - dserv := testu.GetDAGServ() - inbuf, node := testu.GetRandomNode(t, dserv, 1024, testu.UseProtoBufLeaves) - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - - outbuf, err := ioutil.ReadAll(reader) - if err != nil { - t.Fatal(err) - } - - err = testu.ArrComp(inbuf, outbuf) - if err != nil { - t.Fatal(err) - } -} - -func TestSeekAndRead(t *testing.T) { - dserv := testu.GetDAGServ() - inbuf := make([]byte, 256) - for i := 0; i <= 255; i++ { - inbuf[i] = byte(i) - } - - node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves) - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - - for i := 255; i >= 0; i-- { - reader.Seek(int64(i), io.SeekStart) - - if getOffset(reader) != int64(i) { - t.Fatal("expected offset to be increased by one after read") - } - - out := readByte(t, reader) - - if int(out) != i { - t.Fatalf("read %d at index %d, expected %d", out, i, i) - } - - if getOffset(reader) != int64(i+1) { - t.Fatal("expected offset to be increased by one after read") - } - } -} - -func TestSeekAndReadLarge(t *testing.T) { - dserv := testu.GetDAGServ() - inbuf := make([]byte, 20000) - rand.Read(inbuf) - - node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves) - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - - _, err = reader.Seek(10000, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - buf := make([]byte, 100) - _, err = io.ReadFull(reader, buf) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(buf, inbuf[10000:10100]) { - t.Fatal("seeked read failed") - } - - pbdr := reader.(*PBDagReader) - var count int - for i, p := range pbdr.promises { - if i > 20 && i < 30 { - if p == nil { - t.Fatal("expected index to be not nil: ", i) - } - count++ - } else { - if p != nil { - t.Fatal("expected index to be nil: ", i) - } - } - } - // -1 because we read some and it cleared one - if count != preloadSize-1 { - t.Fatalf("expected %d preloaded promises, got %d", preloadSize-1, count) - } -} - -func TestReadAndCancel(t *testing.T) { - dserv := testu.GetDAGServ() - inbuf := make([]byte, 20000) - rand.Read(inbuf) - - node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves) - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - - ctx, cancel := context.WithCancel(context.Background()) - buf := make([]byte, 100) - _, err = reader.CtxReadFull(ctx, buf) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(buf, inbuf[0:100]) { - t.Fatal("read failed") - } - cancel() - - b, err := ioutil.ReadAll(reader) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(inbuf[100:], b) { - t.Fatal("buffers not equal") - } -} - -func TestRelativeSeek(t *testing.T) { - dserv := testu.GetDAGServ() - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - inbuf := make([]byte, 1024) - - for i := 0; i < 256; i++ { - inbuf[i*4] = byte(i) - } - - inbuf[1023] = 1 // force the reader to be 1024 bytes - node := testu.GetNode(t, dserv, inbuf, testu.UseProtoBufLeaves) - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - - for i := 0; i < 256; i++ { - if getOffset(reader) != int64(i*4) { - t.Fatalf("offset should be %d, was %d", i*4, getOffset(reader)) - } - out := readByte(t, reader) - if int(out) != i { - t.Fatalf("expected to read: %d at %d, read %d", i, getOffset(reader)-1, out) - } - if i != 255 { - _, err := reader.Seek(3, io.SeekCurrent) - if err != nil { - t.Fatal(err) - } - } - } - - _, err = reader.Seek(4, io.SeekEnd) - if err != nil { - t.Fatal(err) - } - - for i := 0; i < 256; i++ { - if getOffset(reader) != int64(1020-i*4) { - t.Fatalf("offset should be %d, was %d", 1020-i*4, getOffset(reader)) - } - out := readByte(t, reader) - if int(out) != 255-i { - t.Fatalf("expected to read: %d at %d, read %d", 255-i, getOffset(reader)-1, out) - } - reader.Seek(-5, io.SeekCurrent) // seek 4 bytes but we read one byte every time so 5 bytes - } - -} - -func TestTypeFailures(t *testing.T) { - dserv := testu.GetDAGServ() - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - node := unixfs.EmptyDirNode() - if _, err := NewDagReader(ctx, node, dserv); err != ErrIsDir { - t.Fatalf("excepted to get %v, got %v", ErrIsDir, err) - } - - data, err := unixfs.SymlinkData("/somelink") - if err != nil { - t.Fatal(err) - } - node = mdag.NodeWithData(data) - - if _, err := NewDagReader(ctx, node, dserv); err != ErrCantReadSymlinks { - t.Fatalf("excepted to get %v, got %v", ErrCantReadSymlinks, err) - } -} - -func TestBadPBData(t *testing.T) { - dserv := testu.GetDAGServ() - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - node := mdag.NodeWithData([]byte{42}) - _, err := NewDagReader(ctx, node, dserv) - if err == nil { - t.Fatal("excepted error, got nil") - } -} - -func TestMetadataNode(t *testing.T) { - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - dserv := testu.GetDAGServ() - rdata, rnode := testu.GetRandomNode(t, dserv, 512, testu.UseProtoBufLeaves) - err := dserv.Add(ctx, rnode) - if err != nil { - t.Fatal(err) - } - - data, err := unixfs.BytesForMetadata(&unixfs.Metadata{ - MimeType: "text", - Size: 125, - }) - if err != nil { - t.Fatal(err) - } - node := mdag.NodeWithData(data) - - _, err = NewDagReader(ctx, node, dserv) - if err == nil { - t.Fatal("expected an error") - } - if !strings.Contains(err.Error(), "incorrectly formatted") { - t.Fatal("expected different error") - } - - node.AddNodeLink("", rnode) - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - readdata, err := ioutil.ReadAll(reader) - if err != nil { - t.Fatal(err) - } - if err := testu.ArrComp(rdata, readdata); err != nil { - t.Fatal(err) - } -} - -func TestWriteTo(t *testing.T) { - dserv := testu.GetDAGServ() - inbuf, node := testu.GetRandomNode(t, dserv, 1024, testu.UseProtoBufLeaves) - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - - outbuf := new(bytes.Buffer) - reader.WriteTo(outbuf) - - err = testu.ArrComp(inbuf, outbuf.Bytes()) - if err != nil { - t.Fatal(err) - } - -} - -func TestReaderSzie(t *testing.T) { - dserv := testu.GetDAGServ() - size := int64(1024) - _, node := testu.GetRandomNode(t, dserv, size, testu.UseProtoBufLeaves) - ctx, closer := context.WithCancel(context.Background()) - defer closer() - - reader, err := NewDagReader(ctx, node, dserv) - if err != nil { - t.Fatal(err) - } - - if reader.Size() != uint64(size) { - t.Fatal("wrong reader size") - } -} - -func readByte(t testing.TB, reader DagReader) byte { - out := make([]byte, 1) - c, err := reader.Read(out) - - if c != 1 { - t.Fatal("reader should have read just one byte") - } - if err != nil { - t.Fatal(err) - } - - return out[0] -} - -func getOffset(reader DagReader) int64 { - offset, err := reader.Seek(0, io.SeekCurrent) - if err != nil { - panic("failed to retrieve offset: " + err.Error()) - } - return offset -} diff --git a/unixfs/io/directory.go b/unixfs/io/directory.go deleted file mode 100644 index 5aa115da9..000000000 --- a/unixfs/io/directory.go +++ /dev/null @@ -1,257 +0,0 @@ -package io - -import ( - "context" - "fmt" - "os" - - mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - format "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - hamt "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/hamt" - - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// UseHAMTSharding is a global flag that signifies whether or not to use the -// HAMT sharding scheme for directory creation -var UseHAMTSharding = false - -// DefaultShardWidth is the default value used for hamt sharding width. -var DefaultShardWidth = 256 - -// Directory defines a UnixFS directory. It is used for creating, reading and -// editing directories. It allows to work with different directory schemes, -// like the basic or the HAMT implementation. -// -// It just allows to perform explicit edits on a single directory, working with -// directory trees is out of its scope, they are managed by the MFS layer -// (which is the main consumer of this interface). -type Directory interface { - - // SetPrefix sets the CID prefix of the root node. - SetPrefix(*cid.Prefix) - - // AddChild adds a (name, key) pair to the root node. - AddChild(context.Context, string, ipld.Node) error - - // ForEachLink applies the given function to Links in the directory. - ForEachLink(context.Context, func(*ipld.Link) error) error - - // Links returns the all the links in the directory node. - Links(context.Context) ([]*ipld.Link, error) - - // Find returns the root node of the file named 'name' within this directory. - // In the case of HAMT-directories, it will traverse the tree. - Find(context.Context, string) (ipld.Node, error) - - // RemoveChild removes the child with the given name. - RemoveChild(context.Context, string) error - - // GetNode returns the root of this directory. - GetNode() (ipld.Node, error) - - // GetPrefix returns the CID Prefix used. - GetPrefix() *cid.Prefix -} - -// TODO: Evaluate removing `dserv` from this layer and providing it in MFS. -// (The functions should in that case add a `DAGService` argument.) - -// BasicDirectory is the basic implementation of `Directory`. All the entries -// are stored in a single node. -type BasicDirectory struct { - node *mdag.ProtoNode - dserv ipld.DAGService -} - -// HAMTDirectory is the HAMT implementation of `Directory`. -// (See package `hamt` for more information.) -type HAMTDirectory struct { - shard *hamt.Shard - dserv ipld.DAGService -} - -// NewDirectory returns a Directory. It needs a `DAGService` to add the children. -func NewDirectory(dserv ipld.DAGService) Directory { - if UseHAMTSharding { - dir := new(HAMTDirectory) - s, err := hamt.NewShard(dserv, DefaultShardWidth) - if err != nil { - panic(err) // will only panic if DefaultShardWidth is a bad value - } - dir.shard = s - dir.dserv = dserv - return dir - } - - dir := new(BasicDirectory) - dir.node = format.EmptyDirNode() - dir.dserv = dserv - return dir -} - -// ErrNotADir implies that the given node was not a unixfs directory -var ErrNotADir = fmt.Errorf("merkledag node was not a directory or shard") - -// NewDirectoryFromNode loads a unixfs directory from the given IPLD node and -// DAGService. -func NewDirectoryFromNode(dserv ipld.DAGService, node ipld.Node) (Directory, error) { - protoBufNode, ok := node.(*mdag.ProtoNode) - if !ok { - return nil, ErrNotADir - } - - fsNode, err := format.FSNodeFromBytes(protoBufNode.Data()) - if err != nil { - return nil, err - } - - switch fsNode.Type() { - case format.TDirectory: - return &BasicDirectory{ - dserv: dserv, - node: protoBufNode.Copy().(*mdag.ProtoNode), - }, nil - case format.THAMTShard: - shard, err := hamt.NewHamtFromDag(dserv, node) - if err != nil { - return nil, err - } - return &HAMTDirectory{ - dserv: dserv, - shard: shard, - }, nil - } - - return nil, ErrNotADir -} - -// SetPrefix implements the `Directory` interface. -func (d *BasicDirectory) SetPrefix(prefix *cid.Prefix) { - d.node.SetPrefix(prefix) -} - -// AddChild implements the `Directory` interface. It adds (or replaces) -// a link to the given `node` under `name`. -func (d *BasicDirectory) AddChild(ctx context.Context, name string, node ipld.Node) error { - d.node.RemoveNodeLink(name) - // Remove old link (if it existed), don't check a potential `ErrNotFound`. - - return d.node.AddNodeLink(name, node) -} - -// ForEachLink implements the `Directory` interface. -func (d *BasicDirectory) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error { - for _, l := range d.node.Links() { - if err := f(l); err != nil { - return err - } - } - return nil -} - -// Links implements the `Directory` interface. -func (d *BasicDirectory) Links(ctx context.Context) ([]*ipld.Link, error) { - return d.node.Links(), nil -} - -// Find implements the `Directory` interface. -func (d *BasicDirectory) Find(ctx context.Context, name string) (ipld.Node, error) { - lnk, err := d.node.GetNodeLink(name) - if err == mdag.ErrLinkNotFound { - err = os.ErrNotExist - } - if err != nil { - return nil, err - } - - return d.dserv.Get(ctx, lnk.Cid) -} - -// RemoveChild implements the `Directory` interface. -func (d *BasicDirectory) RemoveChild(ctx context.Context, name string) error { - return d.node.RemoveNodeLink(name) -} - -// GetNode implements the `Directory` interface. -func (d *BasicDirectory) GetNode() (ipld.Node, error) { - return d.node, nil -} - -// GetPrefix implements the `Directory` interface. -func (d *BasicDirectory) GetPrefix() *cid.Prefix { - return &d.node.Prefix -} - -// SwitchToSharding returns a HAMT implementation of this directory. -func (d *BasicDirectory) SwitchToSharding(ctx context.Context) (Directory, error) { - hamtDir := new(HAMTDirectory) - hamtDir.dserv = d.dserv - - shard, err := hamt.NewShard(d.dserv, DefaultShardWidth) - if err != nil { - return nil, err - } - shard.SetPrefix(&d.node.Prefix) - hamtDir.shard = shard - - for _, lnk := range d.node.Links() { - node, err := d.dserv.Get(ctx, lnk.Cid) - if err != nil { - return nil, err - } - - err = hamtDir.shard.Set(ctx, lnk.Name, node) - if err != nil { - return nil, err - } - } - - return hamtDir, nil -} - -// SetPrefix implements the `Directory` interface. -func (d *HAMTDirectory) SetPrefix(prefix *cid.Prefix) { - d.shard.SetPrefix(prefix) -} - -// AddChild implements the `Directory` interface. -func (d *HAMTDirectory) AddChild(ctx context.Context, name string, nd ipld.Node) error { - return d.shard.Set(ctx, name, nd) -} - -// ForEachLink implements the `Directory` interface. -func (d *HAMTDirectory) ForEachLink(ctx context.Context, f func(*ipld.Link) error) error { - return d.shard.ForEachLink(ctx, f) -} - -// Links implements the `Directory` interface. -func (d *HAMTDirectory) Links(ctx context.Context) ([]*ipld.Link, error) { - return d.shard.EnumLinks(ctx) -} - -// Find implements the `Directory` interface. It will traverse the tree. -func (d *HAMTDirectory) Find(ctx context.Context, name string) (ipld.Node, error) { - lnk, err := d.shard.Find(ctx, name) - if err != nil { - return nil, err - } - - return lnk.GetNode(ctx, d.dserv) -} - -// RemoveChild implements the `Directory` interface. -func (d *HAMTDirectory) RemoveChild(ctx context.Context, name string) error { - return d.shard.Remove(ctx, name) -} - -// GetNode implements the `Directory` interface. -func (d *HAMTDirectory) GetNode() (ipld.Node, error) { - return d.shard.Node() -} - -// GetPrefix implements the `Directory` interface. -func (d *HAMTDirectory) GetPrefix() *cid.Prefix { - return d.shard.Prefix() -} diff --git a/unixfs/io/directory_test.go b/unixfs/io/directory_test.go deleted file mode 100644 index 16b43b693..000000000 --- a/unixfs/io/directory_test.go +++ /dev/null @@ -1,158 +0,0 @@ -package io - -import ( - "context" - "fmt" - "testing" - - mdtest "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" -) - -func TestEmptyNode(t *testing.T) { - n := ft.EmptyDirNode() - if len(n.Links()) != 0 { - t.Fatal("empty node should have 0 links") - } -} - -func TestDirectoryGrowth(t *testing.T) { - ds := mdtest.Mock() - dir := NewDirectory(ds) - ctx := context.Background() - - d := ft.EmptyDirNode() - ds.Add(ctx, d) - - nelems := 10000 - - for i := 0; i < nelems; i++ { - err := dir.AddChild(ctx, fmt.Sprintf("dir%d", i), d) - if err != nil { - t.Fatal(err) - } - } - - _, err := dir.GetNode() - if err != nil { - t.Fatal(err) - } - - links, err := dir.Links(ctx) - if err != nil { - t.Fatal(err) - } - - if len(links) != nelems { - t.Fatal("didnt get right number of elements") - } - - dirc := d.Cid() - - names := make(map[string]bool) - for _, l := range links { - names[l.Name] = true - if !l.Cid.Equals(dirc) { - t.Fatal("link wasnt correct") - } - } - - for i := 0; i < nelems; i++ { - dn := fmt.Sprintf("dir%d", i) - if !names[dn] { - t.Fatal("didnt find directory: ", dn) - } - - _, err := dir.Find(context.Background(), dn) - if err != nil { - t.Fatal(err) - } - } -} - -func TestDuplicateAddDir(t *testing.T) { - ds := mdtest.Mock() - dir := NewDirectory(ds) - ctx := context.Background() - nd := ft.EmptyDirNode() - - err := dir.AddChild(ctx, "test", nd) - if err != nil { - t.Fatal(err) - } - - err = dir.AddChild(ctx, "test", nd) - if err != nil { - t.Fatal(err) - } - - lnks, err := dir.Links(ctx) - if err != nil { - t.Fatal(err) - } - - if len(lnks) != 1 { - t.Fatal("expected only one link") - } -} - -func TestDirBuilder(t *testing.T) { - ds := mdtest.Mock() - dir := NewDirectory(ds) - ctx := context.Background() - - child := ft.EmptyDirNode() - err := ds.Add(ctx, child) - if err != nil { - t.Fatal(err) - } - - count := 5000 - - for i := 0; i < count; i++ { - err := dir.AddChild(ctx, fmt.Sprintf("entry %d", i), child) - if err != nil { - t.Fatal(err) - } - } - - dirnd, err := dir.GetNode() - if err != nil { - t.Fatal(err) - } - - links, err := dir.Links(ctx) - if err != nil { - t.Fatal(err) - } - - if len(links) != count { - t.Fatal("not enough links dawg", len(links), count) - } - - adir, err := NewDirectoryFromNode(ds, dirnd) - if err != nil { - t.Fatal(err) - } - - links, err = adir.Links(ctx) - if err != nil { - t.Fatal(err) - } - - names := make(map[string]bool) - for _, lnk := range links { - names[lnk.Name] = true - } - - for i := 0; i < count; i++ { - n := fmt.Sprintf("entry %d", i) - if !names[n] { - t.Fatal("COULDNT FIND: ", n) - } - } - - if len(links) != count { - t.Fatal("wrong number of links", len(links), count) - } -} diff --git a/unixfs/io/doc.go b/unixfs/io/doc.go deleted file mode 100644 index cf844bd23..000000000 --- a/unixfs/io/doc.go +++ /dev/null @@ -1,3 +0,0 @@ -// Package io implements convenience objects for working with the ipfs -// unixfs data format. -package io diff --git a/unixfs/io/pbdagreader.go b/unixfs/io/pbdagreader.go deleted file mode 100644 index c4509813f..000000000 --- a/unixfs/io/pbdagreader.go +++ /dev/null @@ -1,328 +0,0 @@ -package io - -import ( - "context" - "errors" - "fmt" - "io" - - mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - ftpb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb" - - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// PBDagReader provides a way to easily read the data contained in a dag. -type PBDagReader struct { - serv ipld.NodeGetter - - // UnixFS file (it should be of type `Data_File` or `Data_Raw` only). - file *ft.FSNode - - // the current data buffer to be read from - // will either be a bytes.Reader or a child DagReader - buf ReadSeekCloser - - // NodePromises for each of 'nodes' child links - promises []*ipld.NodePromise - - // the cid of each child of the current node - links []*cid.Cid - - // the index of the child link currently being read from - linkPosition int - - // current offset for the read head within the 'file' - offset int64 - - // Our context - ctx context.Context - - // context cancel for children - cancel func() -} - -var _ DagReader = (*PBDagReader)(nil) - -// NewPBFileReader constructs a new PBFileReader. -func NewPBFileReader(ctx context.Context, n *mdag.ProtoNode, file *ft.FSNode, serv ipld.NodeGetter) *PBDagReader { - fctx, cancel := context.WithCancel(ctx) - curLinks := getLinkCids(n) - return &PBDagReader{ - serv: serv, - buf: NewBufDagReader(file.Data()), - promises: make([]*ipld.NodePromise, len(curLinks)), - links: curLinks, - ctx: fctx, - cancel: cancel, - file: file, - } -} - -const preloadSize = 10 - -func (dr *PBDagReader) preload(ctx context.Context, beg int) { - end := beg + preloadSize - if end >= len(dr.links) { - end = len(dr.links) - } - - copy(dr.promises[beg:], ipld.GetNodes(ctx, dr.serv, dr.links[beg:end])) -} - -// precalcNextBuf follows the next link in line and loads it from the -// DAGService, setting the next buffer to read from -func (dr *PBDagReader) precalcNextBuf(ctx context.Context) error { - if dr.buf != nil { - dr.buf.Close() // Just to make sure - dr.buf = nil - } - - if dr.linkPosition >= len(dr.promises) { - return io.EOF - } - - // If we drop to <= preloadSize/2 preloading nodes, preload the next 10. - for i := dr.linkPosition; i < dr.linkPosition+preloadSize/2 && i < len(dr.promises); i++ { - // TODO: check if canceled. - if dr.promises[i] == nil { - dr.preload(ctx, i) - break - } - } - - nxt, err := dr.promises[dr.linkPosition].Get(ctx) - dr.promises[dr.linkPosition] = nil - switch err { - case nil: - case context.DeadlineExceeded, context.Canceled: - err = ctx.Err() - if err != nil { - return ctx.Err() - } - // In this case, the context used to *preload* the node has been canceled. - // We need to retry the load with our context and we might as - // well preload some extra nodes while we're at it. - // - // Note: When using `Read`, this code will never execute as - // `Read` will use the global context. It only runs if the user - // explicitly reads with a custom context (e.g., by calling - // `CtxReadFull`). - dr.preload(ctx, dr.linkPosition) - nxt, err = dr.promises[dr.linkPosition].Get(ctx) - dr.promises[dr.linkPosition] = nil - if err != nil { - return err - } - default: - return err - } - - dr.linkPosition++ - - return dr.loadBufNode(nxt) -} - -func (dr *PBDagReader) loadBufNode(node ipld.Node) error { - switch node := node.(type) { - case *mdag.ProtoNode: - fsNode, err := ft.FSNodeFromBytes(node.Data()) - if err != nil { - return fmt.Errorf("incorrectly formatted protobuf: %s", err) - } - - switch fsNode.Type() { - case ftpb.Data_File: - dr.buf = NewPBFileReader(dr.ctx, node, fsNode, dr.serv) - return nil - case ftpb.Data_Raw: - dr.buf = NewBufDagReader(fsNode.Data()) - return nil - default: - return fmt.Errorf("found %s node in unexpected place", fsNode.Type().String()) - } - case *mdag.RawNode: - dr.buf = NewBufDagReader(node.RawData()) - return nil - default: - return ErrUnkownNodeType - } -} - -func getLinkCids(n ipld.Node) []*cid.Cid { - links := n.Links() - out := make([]*cid.Cid, 0, len(links)) - for _, l := range links { - out = append(out, l.Cid) - } - return out -} - -// Size return the total length of the data from the DAG structured file. -func (dr *PBDagReader) Size() uint64 { - return dr.file.FileSize() -} - -// Read reads data from the DAG structured file -func (dr *PBDagReader) Read(b []byte) (int, error) { - return dr.CtxReadFull(dr.ctx, b) -} - -// CtxReadFull reads data from the DAG structured file -func (dr *PBDagReader) CtxReadFull(ctx context.Context, b []byte) (int, error) { - if dr.buf == nil { - if err := dr.precalcNextBuf(ctx); err != nil { - return 0, err - } - } - - // If no cached buffer, load one - total := 0 - for { - // Attempt to fill bytes from cached buffer - n, err := io.ReadFull(dr.buf, b[total:]) - total += n - dr.offset += int64(n) - switch err { - // io.EOF will happen is dr.buf had noting more to read (n == 0) - case io.EOF, io.ErrUnexpectedEOF: - // do nothing - case nil: - return total, nil - default: - return total, err - } - - // if we are not done with the output buffer load next block - err = dr.precalcNextBuf(ctx) - if err != nil { - return total, err - } - } -} - -// WriteTo writes to the given writer. -func (dr *PBDagReader) WriteTo(w io.Writer) (int64, error) { - if dr.buf == nil { - if err := dr.precalcNextBuf(dr.ctx); err != nil { - return 0, err - } - } - - // If no cached buffer, load one - total := int64(0) - for { - // Attempt to write bytes from cached buffer - n, err := dr.buf.WriteTo(w) - total += n - dr.offset += n - if err != nil { - if err != io.EOF { - return total, err - } - } - - // Otherwise, load up the next block - err = dr.precalcNextBuf(dr.ctx) - if err != nil { - if err == io.EOF { - return total, nil - } - return total, err - } - } -} - -// Close closes the reader. -func (dr *PBDagReader) Close() error { - dr.cancel() - return nil -} - -// Seek implements io.Seeker, and will seek to a given offset in the file -// interface matches standard unix seek -// TODO: check if we can do relative seeks, to reduce the amount of dagreader -// recreations that need to happen. -func (dr *PBDagReader) Seek(offset int64, whence int) (int64, error) { - switch whence { - case io.SeekStart: - if offset < 0 { - return -1, errors.New("invalid offset") - } - if offset == dr.offset { - return offset, nil - } - - // left represents the number of bytes remaining to seek to (from beginning) - left := offset - if int64(len(dr.file.Data())) >= offset { - // Close current buf to close potential child dagreader - if dr.buf != nil { - dr.buf.Close() - } - dr.buf = NewBufDagReader(dr.file.Data()[offset:]) - - // start reading links from the beginning - dr.linkPosition = 0 - dr.offset = offset - return offset, nil - } - - // skip past root block data - left -= int64(len(dr.file.Data())) - - // iterate through links and find where we need to be - for i := 0; i < dr.file.NumChildren(); i++ { - if dr.file.BlockSize(i) > uint64(left) { - dr.linkPosition = i - break - } else { - left -= int64(dr.file.BlockSize(i)) - } - } - - // start sub-block request - err := dr.precalcNextBuf(dr.ctx) - if err != nil { - return 0, err - } - - // set proper offset within child readseeker - n, err := dr.buf.Seek(left, io.SeekStart) - if err != nil { - return -1, err - } - - // sanity - left -= n - if left != 0 { - return -1, errors.New("failed to seek properly") - } - dr.offset = offset - return offset, nil - case io.SeekCurrent: - // TODO: be smarter here - if offset == 0 { - return dr.offset, nil - } - - noffset := dr.offset + offset - return dr.Seek(noffset, io.SeekStart) - case io.SeekEnd: - noffset := int64(dr.file.FileSize()) - offset - n, err := dr.Seek(noffset, io.SeekStart) - - // Return negative number if we can't figure out the file size. Using io.EOF - // for this seems to be good(-enough) solution as it's only returned by - // precalcNextBuf when we step out of file range. - // This is needed for gateway to function properly - if err == io.EOF && dr.file.Type() == ftpb.Data_File { - return -1, nil - } - return n, err - default: - return 0, errors.New("invalid whence") - } -} diff --git a/unixfs/io/resolve.go b/unixfs/io/resolve.go deleted file mode 100644 index 173648e9a..000000000 --- a/unixfs/io/resolve.go +++ /dev/null @@ -1,58 +0,0 @@ -package io - -import ( - "context" - - dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - hamt "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/hamt" - - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// ResolveUnixfsOnce resolves a single hop of a path through a graph in a -// unixfs context. This includes handling traversing sharded directories. -func ResolveUnixfsOnce(ctx context.Context, ds ipld.NodeGetter, nd ipld.Node, names []string) (*ipld.Link, []string, error) { - switch nd := nd.(type) { - case *dag.ProtoNode: - upb, err := ft.FromBytes(nd.Data()) - if err != nil { - // Not a unixfs node, use standard object traversal code - lnk, err := nd.GetNodeLink(names[0]) - if err != nil { - return nil, nil, err - } - - return lnk, names[1:], nil - } - - switch upb.GetType() { - case ft.THAMTShard: - rods := dag.NewReadOnlyDagService(ds) - s, err := hamt.NewHamtFromDag(rods, nd) - if err != nil { - return nil, nil, err - } - - out, err := s.Find(ctx, names[0]) - if err != nil { - return nil, nil, err - } - - return out, names[1:], nil - default: - lnk, err := nd.GetNodeLink(names[0]) - if err != nil { - return nil, nil, err - } - - return lnk, names[1:], nil - } - default: - lnk, rest, err := nd.ResolveLink(names) - if err != nil { - return nil, nil, err - } - return lnk, rest, nil - } -} diff --git a/unixfs/mod/dagmodifier.go b/unixfs/mod/dagmodifier.go deleted file mode 100644 index 65e0fe4c6..000000000 --- a/unixfs/mod/dagmodifier.go +++ /dev/null @@ -1,592 +0,0 @@ -// Package mod provides DAG modification utilities to, for example, -// insert additional nodes in a unixfs DAG or truncate them. -package mod - -import ( - "bytes" - "context" - "errors" - "io" - - mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - help "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers" - trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle" - uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io" - - chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker" - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// Common errors -var ( - ErrSeekFail = errors.New("failed to seek properly") - ErrUnrecognizedWhence = errors.New("unrecognized whence") - ErrNotUnixfs = errors.New("dagmodifier only supports unixfs nodes (proto or raw)") -) - -// 2MB -var writebufferSize = 1 << 21 - -// DagModifier is the only struct licensed and able to correctly -// perform surgery on a DAG 'file' -// Dear god, please rename this to something more pleasant -type DagModifier struct { - dagserv ipld.DAGService - curNode ipld.Node - - splitter chunker.SplitterGen - ctx context.Context - readCancel func() - - writeStart uint64 - curWrOff uint64 - wrBuf *bytes.Buffer - - Prefix cid.Prefix - RawLeaves bool - - read uio.DagReader -} - -// NewDagModifier returns a new DagModifier, the Cid prefix for newly -// created nodes will be inhered from the passed in node. If the Cid -// version if not 0 raw leaves will also be enabled. The Prefix and -// RawLeaves options can be overridden by changing them after the call. -func NewDagModifier(ctx context.Context, from ipld.Node, serv ipld.DAGService, spl chunker.SplitterGen) (*DagModifier, error) { - switch from.(type) { - case *mdag.ProtoNode, *mdag.RawNode: - // ok - default: - return nil, ErrNotUnixfs - } - - prefix := from.Cid().Prefix() - prefix.Codec = cid.DagProtobuf - rawLeaves := false - if prefix.Version > 0 { - rawLeaves = true - } - - return &DagModifier{ - curNode: from.Copy(), - dagserv: serv, - splitter: spl, - ctx: ctx, - Prefix: prefix, - RawLeaves: rawLeaves, - }, nil -} - -// WriteAt will modify a dag file in place -func (dm *DagModifier) WriteAt(b []byte, offset int64) (int, error) { - // TODO: this is currently VERY inefficient - // each write that happens at an offset other than the current one causes a - // flush to disk, and dag rewrite - if offset == int64(dm.writeStart) && dm.wrBuf != nil { - // If we would overwrite the previous write - if len(b) >= dm.wrBuf.Len() { - dm.wrBuf.Reset() - } - } else if uint64(offset) != dm.curWrOff { - size, err := dm.Size() - if err != nil { - return 0, err - } - if offset > size { - err := dm.expandSparse(offset - size) - if err != nil { - return 0, err - } - } - - err = dm.Sync() - if err != nil { - return 0, err - } - dm.writeStart = uint64(offset) - } - - return dm.Write(b) -} - -// A reader that just returns zeros -type zeroReader struct{} - -func (zr zeroReader) Read(b []byte) (int, error) { - for i := range b { - b[i] = 0 - } - return len(b), nil -} - -// expandSparse grows the file with zero blocks of 4096 -// A small blocksize is chosen to aid in deduplication -func (dm *DagModifier) expandSparse(size int64) error { - r := io.LimitReader(zeroReader{}, size) - spl := chunker.NewSizeSplitter(r, 4096) - nnode, err := dm.appendData(dm.curNode, spl) - if err != nil { - return err - } - err = dm.dagserv.Add(dm.ctx, nnode) - return err -} - -// Write continues writing to the dag at the current offset -func (dm *DagModifier) Write(b []byte) (int, error) { - if dm.read != nil { - dm.read = nil - } - if dm.wrBuf == nil { - dm.wrBuf = new(bytes.Buffer) - } - - n, err := dm.wrBuf.Write(b) - if err != nil { - return n, err - } - dm.curWrOff += uint64(n) - if dm.wrBuf.Len() > writebufferSize { - err := dm.Sync() - if err != nil { - return n, err - } - } - return n, nil -} - -// Size returns the Filesize of the node -func (dm *DagModifier) Size() (int64, error) { - fileSize, err := fileSize(dm.curNode) - if err != nil { - return 0, err - } - if dm.wrBuf != nil && int64(dm.wrBuf.Len())+int64(dm.writeStart) > int64(fileSize) { - return int64(dm.wrBuf.Len()) + int64(dm.writeStart), nil - } - return int64(fileSize), nil -} - -func fileSize(n ipld.Node) (uint64, error) { - switch nd := n.(type) { - case *mdag.ProtoNode: - f, err := ft.FromBytes(nd.Data()) - if err != nil { - return 0, err - } - return f.GetFilesize(), nil - case *mdag.RawNode: - return uint64(len(nd.RawData())), nil - default: - return 0, ErrNotUnixfs - } -} - -// Sync writes changes to this dag to disk -func (dm *DagModifier) Sync() error { - // No buffer? Nothing to do - if dm.wrBuf == nil { - return nil - } - - // If we have an active reader, kill it - if dm.read != nil { - dm.read = nil - dm.readCancel() - } - - // Number of bytes we're going to write - buflen := dm.wrBuf.Len() - - // overwrite existing dag nodes - thisc, err := dm.modifyDag(dm.curNode, dm.writeStart) - if err != nil { - return err - } - - dm.curNode, err = dm.dagserv.Get(dm.ctx, thisc) - if err != nil { - return err - } - - // need to write past end of current dag - if dm.wrBuf.Len() > 0 { - dm.curNode, err = dm.appendData(dm.curNode, dm.splitter(dm.wrBuf)) - if err != nil { - return err - } - - err = dm.dagserv.Add(dm.ctx, dm.curNode) - if err != nil { - return err - } - } - - dm.writeStart += uint64(buflen) - - dm.wrBuf = nil - return nil -} - -// modifyDag writes the data in 'dm.wrBuf' over the data in 'node' starting at 'offset' -// returns the new key of the passed in node. -func (dm *DagModifier) modifyDag(n ipld.Node, offset uint64) (*cid.Cid, error) { - // If we've reached a leaf node. - if len(n.Links()) == 0 { - switch nd0 := n.(type) { - case *mdag.ProtoNode: - f, err := ft.FromBytes(nd0.Data()) - if err != nil { - return nil, err - } - - _, err = dm.wrBuf.Read(f.Data[offset:]) - if err != nil && err != io.EOF { - return nil, err - } - - // Update newly written node.. - b, err := proto.Marshal(f) - if err != nil { - return nil, err - } - - nd := new(mdag.ProtoNode) - nd.SetData(b) - nd.SetPrefix(&nd0.Prefix) - err = dm.dagserv.Add(dm.ctx, nd) - if err != nil { - return nil, err - } - - return nd.Cid(), nil - case *mdag.RawNode: - origData := nd0.RawData() - bytes := make([]byte, len(origData)) - - // copy orig data up to offset - copy(bytes, origData[:offset]) - - // copy in new data - n, err := dm.wrBuf.Read(bytes[offset:]) - if err != nil && err != io.EOF { - return nil, err - } - - // copy remaining data - offsetPlusN := int(offset) + n - if offsetPlusN < len(origData) { - copy(bytes[offsetPlusN:], origData[offsetPlusN:]) - } - - nd, err := mdag.NewRawNodeWPrefix(bytes, nd0.Cid().Prefix()) - if err != nil { - return nil, err - } - err = dm.dagserv.Add(dm.ctx, nd) - if err != nil { - return nil, err - } - - return nd.Cid(), nil - } - } - - node, ok := n.(*mdag.ProtoNode) - if !ok { - return nil, ErrNotUnixfs - } - - f, err := ft.FromBytes(node.Data()) - if err != nil { - return nil, err - } - - var cur uint64 - for i, bs := range f.GetBlocksizes() { - // We found the correct child to write into - if cur+bs > offset { - child, err := node.Links()[i].GetNode(dm.ctx, dm.dagserv) - if err != nil { - return nil, err - } - - k, err := dm.modifyDag(child, offset-cur) - if err != nil { - return nil, err - } - - node.Links()[i].Cid = k - - // Recache serialized node - _, err = node.EncodeProtobuf(true) - if err != nil { - return nil, err - } - - if dm.wrBuf.Len() == 0 { - // No more bytes to write! - break - } - offset = cur + bs - } - cur += bs - } - - err = dm.dagserv.Add(dm.ctx, node) - return node.Cid(), err -} - -// appendData appends the blocks from the given chan to the end of this dag -func (dm *DagModifier) appendData(nd ipld.Node, spl chunker.Splitter) (ipld.Node, error) { - switch nd := nd.(type) { - case *mdag.ProtoNode, *mdag.RawNode: - dbp := &help.DagBuilderParams{ - Dagserv: dm.dagserv, - Maxlinks: help.DefaultLinksPerBlock, - Prefix: &dm.Prefix, - RawLeaves: dm.RawLeaves, - } - return trickle.Append(dm.ctx, nd, dbp.New(spl)) - default: - return nil, ErrNotUnixfs - } -} - -// Read data from this dag starting at the current offset -func (dm *DagModifier) Read(b []byte) (int, error) { - err := dm.readPrep() - if err != nil { - return 0, err - } - - n, err := dm.read.Read(b) - dm.curWrOff += uint64(n) - return n, err -} - -func (dm *DagModifier) readPrep() error { - err := dm.Sync() - if err != nil { - return err - } - - if dm.read == nil { - ctx, cancel := context.WithCancel(dm.ctx) - dr, err := uio.NewDagReader(ctx, dm.curNode, dm.dagserv) - if err != nil { - cancel() - return err - } - - i, err := dr.Seek(int64(dm.curWrOff), io.SeekStart) - if err != nil { - cancel() - return err - } - - if i != int64(dm.curWrOff) { - cancel() - return ErrSeekFail - } - - dm.readCancel = cancel - dm.read = dr - } - - return nil -} - -// CtxReadFull reads data from this dag starting at the current offset -func (dm *DagModifier) CtxReadFull(ctx context.Context, b []byte) (int, error) { - err := dm.readPrep() - if err != nil { - return 0, err - } - - n, err := dm.read.CtxReadFull(ctx, b) - dm.curWrOff += uint64(n) - return n, err -} - -// GetNode gets the modified DAG Node -func (dm *DagModifier) GetNode() (ipld.Node, error) { - err := dm.Sync() - if err != nil { - return nil, err - } - return dm.curNode.Copy(), nil -} - -// HasChanges returned whether or not there are unflushed changes to this dag -func (dm *DagModifier) HasChanges() bool { - return dm.wrBuf != nil -} - -// Seek modifies the offset according to whence. See unixfs/io for valid whence -// values. -func (dm *DagModifier) Seek(offset int64, whence int) (int64, error) { - err := dm.Sync() - if err != nil { - return 0, err - } - - fisize, err := dm.Size() - if err != nil { - return 0, err - } - - var newoffset uint64 - switch whence { - case io.SeekCurrent: - newoffset = dm.curWrOff + uint64(offset) - case io.SeekStart: - newoffset = uint64(offset) - case io.SeekEnd: - newoffset = uint64(fisize) - uint64(offset) - default: - return 0, ErrUnrecognizedWhence - } - - if int64(newoffset) > fisize { - if err := dm.expandSparse(int64(newoffset) - fisize); err != nil { - return 0, err - } - } - dm.curWrOff = newoffset - dm.writeStart = newoffset - - if dm.read != nil { - _, err = dm.read.Seek(offset, whence) - if err != nil { - return 0, err - } - } - - return int64(dm.curWrOff), nil -} - -// Truncate truncates the current Node to 'size' and replaces it with the -// new one. -func (dm *DagModifier) Truncate(size int64) error { - err := dm.Sync() - if err != nil { - return err - } - - realSize, err := dm.Size() - if err != nil { - return err - } - if size == int64(realSize) { - return nil - } - - // Truncate can also be used to expand the file - if size > int64(realSize) { - return dm.expandSparse(int64(size) - realSize) - } - - nnode, err := dagTruncate(dm.ctx, dm.curNode, uint64(size), dm.dagserv) - if err != nil { - return err - } - - err = dm.dagserv.Add(dm.ctx, nnode) - if err != nil { - return err - } - - dm.curNode = nnode - return nil -} - -// dagTruncate truncates the given node to 'size' and returns the modified Node -func dagTruncate(ctx context.Context, n ipld.Node, size uint64, ds ipld.DAGService) (ipld.Node, error) { - if len(n.Links()) == 0 { - switch nd := n.(type) { - case *mdag.ProtoNode: - // TODO: this can likely be done without marshaling and remarshaling - pbn, err := ft.FromBytes(nd.Data()) - if err != nil { - return nil, err - } - nd.SetData(ft.WrapData(pbn.Data[:size])) - return nd, nil - case *mdag.RawNode: - return mdag.NewRawNodeWPrefix(nd.RawData()[:size], nd.Cid().Prefix()) - } - } - - nd, ok := n.(*mdag.ProtoNode) - if !ok { - return nil, ErrNotUnixfs - } - - var cur uint64 - end := 0 - var modified ipld.Node - ndata, err := ft.FSNodeFromBytes(nd.Data()) - if err != nil { - return nil, err - } - // Reset the block sizes of the node to adjust them - // with the new values of the truncated children. - ndata.RemoveAllBlockSizes() - for i, lnk := range nd.Links() { - child, err := lnk.GetNode(ctx, ds) - if err != nil { - return nil, err - } - - childsize, err := fileSize(child) - if err != nil { - return nil, err - } - - // found the child we want to cut - if size < cur+childsize { - nchild, err := dagTruncate(ctx, child, size-cur, ds) - if err != nil { - return nil, err - } - - ndata.AddBlockSize(size - cur) - - modified = nchild - end = i - break - } - cur += childsize - ndata.AddBlockSize(childsize) - } - - err = ds.Add(ctx, modified) - if err != nil { - return nil, err - } - - nd.SetLinks(nd.Links()[:end]) - err = nd.AddNodeLink("", modified) - if err != nil { - return nil, err - } - - d, err := ndata.GetBytes() - if err != nil { - return nil, err - } - // Save the new block sizes to the original node. - nd.SetData(d) - - // invalidate cache and recompute serialized data - _, err = nd.EncodeProtobuf(true) - if err != nil { - return nil, err - } - - return nd, nil -} diff --git a/unixfs/mod/dagmodifier_test.go b/unixfs/mod/dagmodifier_test.go deleted file mode 100644 index 8292b1eff..000000000 --- a/unixfs/mod/dagmodifier_test.go +++ /dev/null @@ -1,803 +0,0 @@ -package mod - -import ( - "context" - "fmt" - "io" - "io/ioutil" - "testing" - - h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers" - trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle" - - uio "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/io" - testu "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/test" - - u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util" -) - -func testModWrite(t *testing.T, beg, size uint64, orig []byte, dm *DagModifier, opts testu.NodeOpts) []byte { - newdata := make([]byte, size) - r := u.NewTimeSeededRand() - r.Read(newdata) - - if size+beg > uint64(len(orig)) { - orig = append(orig, make([]byte, (size+beg)-uint64(len(orig)))...) - } - copy(orig[beg:], newdata) - - nmod, err := dm.WriteAt(newdata, int64(beg)) - if err != nil { - t.Fatal(err) - } - - if nmod != int(size) { - t.Fatalf("Mod length not correct! %d != %d", nmod, size) - } - - verifyNode(t, orig, dm, opts) - - return orig -} - -func verifyNode(t *testing.T, orig []byte, dm *DagModifier, opts testu.NodeOpts) { - nd, err := dm.GetNode() - if err != nil { - t.Fatal(err) - } - - err = trickle.VerifyTrickleDagStructure(nd, trickle.VerifyParams{ - Getter: dm.dagserv, - Direct: h.DefaultLinksPerBlock, - LayerRepeat: 4, - Prefix: &opts.Prefix, - RawLeaves: opts.RawLeavesUsed, - }) - if err != nil { - t.Fatal(err) - } - - rd, err := uio.NewDagReader(context.Background(), nd, dm.dagserv) - if err != nil { - t.Fatal(err) - } - - after, err := ioutil.ReadAll(rd) - if err != nil { - t.Fatal(err) - } - - err = testu.ArrComp(after, orig) - if err != nil { - t.Fatal(err) - } -} - -func runAllSubtests(t *testing.T, tfunc func(*testing.T, testu.NodeOpts)) { - t.Run("opts=ProtoBufLeaves", func(t *testing.T) { tfunc(t, testu.UseProtoBufLeaves) }) - t.Run("opts=RawLeaves", func(t *testing.T) { tfunc(t, testu.UseRawLeaves) }) - t.Run("opts=CidV1", func(t *testing.T) { tfunc(t, testu.UseCidV1) }) - t.Run("opts=Blake2b256", func(t *testing.T) { tfunc(t, testu.UseBlake2b256) }) -} - -func TestDagModifierBasic(t *testing.T) { - runAllSubtests(t, testDagModifierBasic) -} -func testDagModifierBasic(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - b, n := testu.GetRandomNode(t, dserv, 50000, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - // Within zero block - beg := uint64(15) - length := uint64(60) - - t.Log("Testing mod within zero block") - b = testModWrite(t, beg, length, b, dagmod, opts) - - // Within bounds of existing file - beg = 1000 - length = 4000 - t.Log("Testing mod within bounds of existing multiblock file.") - b = testModWrite(t, beg, length, b, dagmod, opts) - - // Extend bounds - beg = 49500 - length = 4000 - - t.Log("Testing mod that extends file.") - b = testModWrite(t, beg, length, b, dagmod, opts) - - // "Append" - beg = uint64(len(b)) - length = 3000 - t.Log("Testing pure append") - _ = testModWrite(t, beg, length, b, dagmod, opts) - - // Verify reported length - node, err := dagmod.GetNode() - if err != nil { - t.Fatal(err) - } - - size, err := fileSize(node) - if err != nil { - t.Fatal(err) - } - - expected := uint64(50000 + 3500 + 3000) - if size != expected { - t.Fatalf("Final reported size is incorrect [%d != %d]", size, expected) - } -} - -func TestMultiWrite(t *testing.T) { - runAllSubtests(t, testMultiWrite) -} -func testMultiWrite(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - data := make([]byte, 4000) - u.NewTimeSeededRand().Read(data) - - for i := 0; i < len(data); i++ { - n, err := dagmod.WriteAt(data[i:i+1], int64(i)) - if err != nil { - t.Fatal(err) - } - if n != 1 { - t.Fatal("Somehow wrote the wrong number of bytes! (n != 1)") - } - - size, err := dagmod.Size() - if err != nil { - t.Fatal(err) - } - - if size != int64(i+1) { - t.Fatal("Size was reported incorrectly") - } - } - - verifyNode(t, data, dagmod, opts) -} - -func TestMultiWriteAndFlush(t *testing.T) { - runAllSubtests(t, testMultiWriteAndFlush) -} -func testMultiWriteAndFlush(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - data := make([]byte, 20) - u.NewTimeSeededRand().Read(data) - - for i := 0; i < len(data); i++ { - n, err := dagmod.WriteAt(data[i:i+1], int64(i)) - if err != nil { - t.Fatal(err) - } - if n != 1 { - t.Fatal("Somehow wrote the wrong number of bytes! (n != 1)") - } - err = dagmod.Sync() - if err != nil { - t.Fatal(err) - } - } - - verifyNode(t, data, dagmod, opts) -} - -func TestWriteNewFile(t *testing.T) { - runAllSubtests(t, testWriteNewFile) -} -func testWriteNewFile(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - towrite := make([]byte, 2000) - u.NewTimeSeededRand().Read(towrite) - - nw, err := dagmod.Write(towrite) - if err != nil { - t.Fatal(err) - } - if nw != len(towrite) { - t.Fatal("Wrote wrong amount") - } - - verifyNode(t, towrite, dagmod, opts) -} - -func TestMultiWriteCoal(t *testing.T) { - runAllSubtests(t, testMultiWriteCoal) -} -func testMultiWriteCoal(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - data := make([]byte, 1000) - u.NewTimeSeededRand().Read(data) - - for i := 0; i < len(data); i++ { - n, err := dagmod.WriteAt(data[:i+1], 0) - if err != nil { - fmt.Println("FAIL AT ", i) - t.Fatal(err) - } - if n != i+1 { - t.Fatal("Somehow wrote the wrong number of bytes! (n != 1)") - } - - } - - verifyNode(t, data, dagmod, opts) -} - -func TestLargeWriteChunks(t *testing.T) { - runAllSubtests(t, testLargeWriteChunks) -} -func testLargeWriteChunks(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - wrsize := 1000 - datasize := 10000000 - data := make([]byte, datasize) - - u.NewTimeSeededRand().Read(data) - - for i := 0; i < datasize/wrsize; i++ { - n, err := dagmod.WriteAt(data[i*wrsize:(i+1)*wrsize], int64(i*wrsize)) - if err != nil { - t.Fatal(err) - } - if n != wrsize { - t.Fatal("failed to write buffer") - } - } - - _, err = dagmod.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(dagmod) - if err != nil { - t.Fatal(err) - } - - if err = testu.ArrComp(out, data); err != nil { - t.Fatal(err) - } -} - -func TestDagTruncate(t *testing.T) { - runAllSubtests(t, testDagTruncate) -} -func testDagTruncate(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - b, n := testu.GetRandomNode(t, dserv, 50000, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - err = dagmod.Truncate(12345) - if err != nil { - t.Fatal(err) - } - size, err := dagmod.Size() - if err != nil { - t.Fatal(err) - } - - if size != 12345 { - t.Fatal("size was incorrect!") - } - - _, err = dagmod.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(dagmod) - if err != nil { - t.Fatal(err) - } - - if err = testu.ArrComp(out, b[:12345]); err != nil { - t.Fatal(err) - } - - err = dagmod.Truncate(10) - if err != nil { - t.Fatal(err) - } - - size, err = dagmod.Size() - if err != nil { - t.Fatal(err) - } - - if size != 10 { - t.Fatal("size was incorrect!") - } - - err = dagmod.Truncate(0) - if err != nil { - t.Fatal(err) - } - - size, err = dagmod.Size() - if err != nil { - t.Fatal(err) - } - - if size != 0 { - t.Fatal("size was incorrect!") - } -} - -// TestDagTruncateSameSize tests that a DAG truncated -// to the same size (i.e., doing nothing) doesn't modify -// the DAG (its hash). -func TestDagTruncateSameSize(t *testing.T) { - runAllSubtests(t, testDagTruncateSameSize) -} -func testDagTruncateSameSize(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - _, n := testu.GetRandomNode(t, dserv, 50000, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - // Copied from `TestDagTruncate`. - - size, err := dagmod.Size() - if err != nil { - t.Fatal(err) - } - - err = dagmod.Truncate(size) - if err != nil { - t.Fatal(err) - } - - modifiedNode, err := dagmod.GetNode() - if err != nil { - t.Fatal(err) - } - - if modifiedNode.Cid().Equals(n.Cid()) == false { - t.Fatal("the node has been modified!") - } -} - -func TestSparseWrite(t *testing.T) { - runAllSubtests(t, testSparseWrite) -} -func testSparseWrite(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - buf := make([]byte, 5000) - u.NewTimeSeededRand().Read(buf[2500:]) - - wrote, err := dagmod.WriteAt(buf[2500:], 2500) - if err != nil { - t.Fatal(err) - } - - if wrote != 2500 { - t.Fatal("incorrect write amount") - } - - _, err = dagmod.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(dagmod) - if err != nil { - t.Fatal(err) - } - - if err = testu.ArrComp(out, buf); err != nil { - t.Fatal(err) - } -} - -func TestSeekPastEndWrite(t *testing.T) { - runAllSubtests(t, testSeekPastEndWrite) -} -func testSeekPastEndWrite(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - buf := make([]byte, 5000) - u.NewTimeSeededRand().Read(buf[2500:]) - - nseek, err := dagmod.Seek(2500, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - if nseek != 2500 { - t.Fatal("failed to seek") - } - - wrote, err := dagmod.Write(buf[2500:]) - if err != nil { - t.Fatal(err) - } - - if wrote != 2500 { - t.Fatal("incorrect write amount") - } - - _, err = dagmod.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - - out, err := ioutil.ReadAll(dagmod) - if err != nil { - t.Fatal(err) - } - - if err = testu.ArrComp(out, buf); err != nil { - t.Fatal(err) - } -} - -func TestRelativeSeek(t *testing.T) { - runAllSubtests(t, testRelativeSeek) -} -func testRelativeSeek(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - for i := 0; i < 64; i++ { - dagmod.Write([]byte{byte(i)}) - if _, err := dagmod.Seek(1, io.SeekCurrent); err != nil { - t.Fatal(err) - } - } - - out, err := ioutil.ReadAll(dagmod) - if err != nil { - t.Fatal(err) - } - - for i, v := range out { - if v != 0 && i/2 != int(v) { - t.Errorf("expected %d, at index %d, got %d", i/2, i, v) - } - } -} - -func TestInvalidSeek(t *testing.T) { - runAllSubtests(t, testInvalidSeek) -} -func testInvalidSeek(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(t, dserv, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - _, err = dagmod.Seek(10, -10) - - if err != ErrUnrecognizedWhence { - t.Fatal(err) - } -} - -func TestEndSeek(t *testing.T) { - runAllSubtests(t, testEndSeek) -} -func testEndSeek(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - - n := testu.GetEmptyNode(t, dserv, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - _, err = dagmod.Write(make([]byte, 100)) - if err != nil { - t.Fatal(err) - } - - offset, err := dagmod.Seek(0, io.SeekCurrent) - if err != nil { - t.Fatal(err) - } - if offset != 100 { - t.Fatal("expected the relative seek 0 to return current location") - } - - offset, err = dagmod.Seek(0, io.SeekStart) - if err != nil { - t.Fatal(err) - } - if offset != 0 { - t.Fatal("expected the absolute seek to set offset at 0") - } - - offset, err = dagmod.Seek(0, io.SeekEnd) - if err != nil { - t.Fatal(err) - } - if offset != 100 { - t.Fatal("expected the end seek to set offset at end") - } -} - -func TestReadAndSeek(t *testing.T) { - runAllSubtests(t, testReadAndSeek) -} -func testReadAndSeek(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - - n := testu.GetEmptyNode(t, dserv, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - writeBuf := []byte{0, 1, 2, 3, 4, 5, 6, 7} - dagmod.Write(writeBuf) - - if !dagmod.HasChanges() { - t.Fatal("there are changes, this should be true") - } - - readBuf := make([]byte, 4) - offset, err := dagmod.Seek(0, io.SeekStart) - if offset != 0 { - t.Fatal("expected offset to be 0") - } - if err != nil { - t.Fatal(err) - } - - // read 0,1,2,3 - c, err := dagmod.Read(readBuf) - if err != nil { - t.Fatal(err) - } - if c != 4 { - t.Fatalf("expected length of 4 got %d", c) - } - - for i := byte(0); i < 4; i++ { - if readBuf[i] != i { - t.Fatalf("wrong value %d [at index %d]", readBuf[i], i) - } - } - - // skip 4 - _, err = dagmod.Seek(1, io.SeekCurrent) - if err != nil { - t.Fatalf("error: %s, offset %d, reader offset %d", err, dagmod.curWrOff, getOffset(dagmod.read)) - } - - //read 5,6,7 - readBuf = make([]byte, 3) - c, err = dagmod.Read(readBuf) - if err != nil { - t.Fatal(err) - } - if c != 3 { - t.Fatalf("expected length of 3 got %d", c) - } - - for i := byte(0); i < 3; i++ { - if readBuf[i] != i+5 { - t.Fatalf("wrong value %d [at index %d]", readBuf[i], i) - } - - } - -} - -func TestCtxRead(t *testing.T) { - runAllSubtests(t, testCtxRead) -} -func testCtxRead(t *testing.T, opts testu.NodeOpts) { - dserv := testu.GetDAGServ() - - n := testu.GetEmptyNode(t, dserv, opts) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - t.Fatal(err) - } - if opts.ForceRawLeaves { - dagmod.RawLeaves = true - } - - _, err = dagmod.Write([]byte{0, 1, 2, 3, 4, 5, 6, 7}) - if err != nil { - t.Fatal(err) - } - dagmod.Seek(0, io.SeekStart) - - readBuf := make([]byte, 4) - _, err = dagmod.CtxReadFull(ctx, readBuf) - if err != nil { - t.Fatal(err) - } - err = testu.ArrComp(readBuf, []byte{0, 1, 2, 3}) - if err != nil { - t.Fatal(err) - } - // TODO(Kubuxu): context cancel case, I will do it after I figure out dagreader tests, - // because this is exacelly the same. -} - -func BenchmarkDagmodWrite(b *testing.B) { - b.StopTimer() - dserv := testu.GetDAGServ() - n := testu.GetEmptyNode(b, dserv, testu.UseProtoBufLeaves) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - wrsize := 4096 - - dagmod, err := NewDagModifier(ctx, n, dserv, testu.SizeSplitterGen(512)) - if err != nil { - b.Fatal(err) - } - - buf := make([]byte, b.N*wrsize) - u.NewTimeSeededRand().Read(buf) - b.StartTimer() - b.SetBytes(int64(wrsize)) - for i := 0; i < b.N; i++ { - n, err := dagmod.Write(buf[i*wrsize : (i+1)*wrsize]) - if err != nil { - b.Fatal(err) - } - if n != wrsize { - b.Fatal("Wrote bad size") - } - } -} - -func getOffset(reader uio.DagReader) int64 { - offset, err := reader.Seek(0, io.SeekCurrent) - if err != nil { - panic("failed to retrieve offset: " + err.Error()) - } - return offset -} diff --git a/unixfs/pb/Rules.mk b/unixfs/pb/Rules.mk deleted file mode 100644 index 505f70e75..000000000 --- a/unixfs/pb/Rules.mk +++ /dev/null @@ -1,8 +0,0 @@ -include mk/header.mk - -PB_$(d) = $(wildcard $(d)/*.proto) -TGTS_$(d) = $(PB_$(d):.proto=.pb.go) - -#DEPS_GO += $(TGTS_$(d)) - -include mk/footer.mk diff --git a/unixfs/pb/unixfs.pb.go b/unixfs/pb/unixfs.pb.go deleted file mode 100644 index e28053031..000000000 --- a/unixfs/pb/unixfs.pb.go +++ /dev/null @@ -1,147 +0,0 @@ -// Code generated by protoc-gen-gogo. -// source: unixfs.proto -// DO NOT EDIT! - -/* -Package unixfs_pb is a generated protocol buffer package. - -It is generated from these files: - unixfs.proto - -It has these top-level messages: - Data - Metadata -*/ -package unixfs_pb - -import proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" -import fmt "fmt" -import math "math" - -// Reference imports to suppress errors if they are not otherwise used. -var _ = proto.Marshal -var _ = fmt.Errorf -var _ = math.Inf - -type Data_DataType int32 - -const ( - Data_Raw Data_DataType = 0 - Data_Directory Data_DataType = 1 - Data_File Data_DataType = 2 - Data_Metadata Data_DataType = 3 - Data_Symlink Data_DataType = 4 - Data_HAMTShard Data_DataType = 5 -) - -var Data_DataType_name = map[int32]string{ - 0: "Raw", - 1: "Directory", - 2: "File", - 3: "Metadata", - 4: "Symlink", - 5: "HAMTShard", -} -var Data_DataType_value = map[string]int32{ - "Raw": 0, - "Directory": 1, - "File": 2, - "Metadata": 3, - "Symlink": 4, - "HAMTShard": 5, -} - -func (x Data_DataType) Enum() *Data_DataType { - p := new(Data_DataType) - *p = x - return p -} -func (x Data_DataType) String() string { - return proto.EnumName(Data_DataType_name, int32(x)) -} -func (x *Data_DataType) UnmarshalJSON(data []byte) error { - value, err := proto.UnmarshalJSONEnum(Data_DataType_value, data, "Data_DataType") - if err != nil { - return err - } - *x = Data_DataType(value) - return nil -} - -type Data struct { - Type *Data_DataType `protobuf:"varint,1,req,name=Type,enum=unixfs.pb.Data_DataType" json:"Type,omitempty"` - Data []byte `protobuf:"bytes,2,opt,name=Data" json:"Data,omitempty"` - Filesize *uint64 `protobuf:"varint,3,opt,name=filesize" json:"filesize,omitempty"` - Blocksizes []uint64 `protobuf:"varint,4,rep,name=blocksizes" json:"blocksizes,omitempty"` - HashType *uint64 `protobuf:"varint,5,opt,name=hashType" json:"hashType,omitempty"` - Fanout *uint64 `protobuf:"varint,6,opt,name=fanout" json:"fanout,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *Data) Reset() { *m = Data{} } -func (m *Data) String() string { return proto.CompactTextString(m) } -func (*Data) ProtoMessage() {} - -func (m *Data) GetType() Data_DataType { - if m != nil && m.Type != nil { - return *m.Type - } - return Data_Raw -} - -func (m *Data) GetData() []byte { - if m != nil { - return m.Data - } - return nil -} - -func (m *Data) GetFilesize() uint64 { - if m != nil && m.Filesize != nil { - return *m.Filesize - } - return 0 -} - -func (m *Data) GetBlocksizes() []uint64 { - if m != nil { - return m.Blocksizes - } - return nil -} - -func (m *Data) GetHashType() uint64 { - if m != nil && m.HashType != nil { - return *m.HashType - } - return 0 -} - -func (m *Data) GetFanout() uint64 { - if m != nil && m.Fanout != nil { - return *m.Fanout - } - return 0 -} - -type Metadata struct { - MimeType *string `protobuf:"bytes,1,opt,name=MimeType" json:"MimeType,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *Metadata) Reset() { *m = Metadata{} } -func (m *Metadata) String() string { return proto.CompactTextString(m) } -func (*Metadata) ProtoMessage() {} - -func (m *Metadata) GetMimeType() string { - if m != nil && m.MimeType != nil { - return *m.MimeType - } - return "" -} - -func init() { - proto.RegisterType((*Data)(nil), "unixfs.pb.Data") - proto.RegisterType((*Metadata)(nil), "unixfs.pb.Metadata") - proto.RegisterEnum("unixfs.pb.Data_DataType", Data_DataType_name, Data_DataType_value) -} diff --git a/unixfs/pb/unixfs.proto b/unixfs/pb/unixfs.proto deleted file mode 100644 index 6feb7aad6..000000000 --- a/unixfs/pb/unixfs.proto +++ /dev/null @@ -1,24 +0,0 @@ -package unixfs.pb; - -message Data { - enum DataType { - Raw = 0; - Directory = 1; - File = 2; - Metadata = 3; - Symlink = 4; - HAMTShard = 5; - } - - required DataType Type = 1; - optional bytes Data = 2; - optional uint64 filesize = 3; - repeated uint64 blocksizes = 4; - - optional uint64 hashType = 5; - optional uint64 fanout = 6; -} - -message Metadata { - optional string MimeType = 1; -} diff --git a/unixfs/test/utils.go b/unixfs/test/utils.go deleted file mode 100644 index eca518ed8..000000000 --- a/unixfs/test/utils.go +++ /dev/null @@ -1,135 +0,0 @@ -package testu - -import ( - "bytes" - "context" - "fmt" - "io" - "io/ioutil" - "testing" - - mdag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - mdagmock "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag/test" - ft "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs" - h "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/helpers" - trickle "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/importer/trickle" - - u "gx/ipfs/QmPdKqUcHGFdeSpvjVoaTRPPstGif9GBZb5Q56RVw9o69A/go-ipfs-util" - mh "gx/ipfs/QmPnFwZ2JXKnXgMw8CdBPxn7FWh6LLdjUjxV1fKHuJnkr8/go-multihash" - chunker "gx/ipfs/QmVDjhUMtkRskBFAVNwyXuLSKbeAya7JKPnzAxMKDaK4x4/go-ipfs-chunker" - cid "gx/ipfs/QmYVNvtQkeZ6AKSwDrjQTs432QtL6umrrK41EBq3cu7iSP/go-cid" - ipld "gx/ipfs/QmZtNq8dArGfnpCZfx2pUNY7UcjGhVp5qqwQ4hH6mpTMRQ/go-ipld-format" -) - -// SizeSplitterGen creates a generator. -func SizeSplitterGen(size int64) chunker.SplitterGen { - return func(r io.Reader) chunker.Splitter { - return chunker.NewSizeSplitter(r, size) - } -} - -// GetDAGServ returns a mock DAGService. -func GetDAGServ() ipld.DAGService { - return mdagmock.Mock() -} - -// NodeOpts is used by GetNode, GetEmptyNode and GetRandomNode -type NodeOpts struct { - Prefix cid.Prefix - // ForceRawLeaves if true will force the use of raw leaves - ForceRawLeaves bool - // RawLeavesUsed is true if raw leaves or either implicitly or explicitly enabled - RawLeavesUsed bool -} - -// Some shorthands for NodeOpts. -var ( - UseProtoBufLeaves = NodeOpts{Prefix: mdag.V0CidPrefix()} - UseRawLeaves = NodeOpts{Prefix: mdag.V0CidPrefix(), ForceRawLeaves: true, RawLeavesUsed: true} - UseCidV1 = NodeOpts{Prefix: mdag.V1CidPrefix(), RawLeavesUsed: true} - UseBlake2b256 NodeOpts -) - -func init() { - UseBlake2b256 = UseCidV1 - UseBlake2b256.Prefix.MhType = mh.Names["blake2b-256"] - UseBlake2b256.Prefix.MhLength = -1 -} - -// GetNode returns a unixfs file node with the specified data. -func GetNode(t testing.TB, dserv ipld.DAGService, data []byte, opts NodeOpts) ipld.Node { - in := bytes.NewReader(data) - - dbp := h.DagBuilderParams{ - Dagserv: dserv, - Maxlinks: h.DefaultLinksPerBlock, - Prefix: &opts.Prefix, - RawLeaves: opts.RawLeavesUsed, - } - - node, err := trickle.Layout(dbp.New(SizeSplitterGen(500)(in))) - if err != nil { - t.Fatal(err) - } - - return node -} - -// GetEmptyNode returns an empty unixfs file node. -func GetEmptyNode(t testing.TB, dserv ipld.DAGService, opts NodeOpts) ipld.Node { - return GetNode(t, dserv, []byte{}, opts) -} - -// GetRandomNode returns a random unixfs file node. -func GetRandomNode(t testing.TB, dserv ipld.DAGService, size int64, opts NodeOpts) ([]byte, ipld.Node) { - in := io.LimitReader(u.NewTimeSeededRand(), size) - buf, err := ioutil.ReadAll(in) - if err != nil { - t.Fatal(err) - } - - node := GetNode(t, dserv, buf, opts) - return buf, node -} - -// ArrComp checks if two byte slices are the same. -func ArrComp(a, b []byte) error { - if len(a) != len(b) { - return fmt.Errorf("arrays differ in length. %d != %d", len(a), len(b)) - } - for i, v := range a { - if v != b[i] { - return fmt.Errorf("arrays differ at index: %d", i) - } - } - return nil -} - -// PrintDag pretty-prints the given dag to stdout. -func PrintDag(nd *mdag.ProtoNode, ds ipld.DAGService, indent int) { - pbd, err := ft.FromBytes(nd.Data()) - if err != nil { - panic(err) - } - - for i := 0; i < indent; i++ { - fmt.Print(" ") - } - fmt.Printf("{size = %d, type = %s, children = %d", pbd.GetFilesize(), pbd.GetType().String(), len(pbd.GetBlocksizes())) - if len(nd.Links()) > 0 { - fmt.Println() - } - for _, lnk := range nd.Links() { - child, err := lnk.GetNode(context.Background(), ds) - if err != nil { - panic(err) - } - PrintDag(child.(*mdag.ProtoNode), ds, indent+1) - } - if len(nd.Links()) > 0 { - for i := 0; i < indent; i++ { - fmt.Print(" ") - } - } - fmt.Println("}") -} diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go deleted file mode 100644 index e02fa77df..000000000 --- a/unixfs/unixfs.go +++ /dev/null @@ -1,305 +0,0 @@ -// Package unixfs implements a data format for files in the IPFS filesystem It -// is not the only format in ipfs, but it is the one that the filesystem -// assumes -package unixfs - -import ( - "errors" - - proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" - - dag "gx/ipfs/QmRy4Qk9hbgFX9NGJRm8rBThrA8PZhNCitMgeRYyZ67s59/go-merkledag" - pb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb" -) - -// Shorthands for protobuffer types -const ( - TRaw = pb.Data_Raw - TFile = pb.Data_File - TDirectory = pb.Data_Directory - TMetadata = pb.Data_Metadata - TSymlink = pb.Data_Symlink - THAMTShard = pb.Data_HAMTShard -) - -// Common errors -var ( - ErrMalformedFileFormat = errors.New("malformed data in file format") - ErrUnrecognizedType = errors.New("unrecognized node type") -) - -// FromBytes unmarshals a byte slice as protobuf Data. -func FromBytes(data []byte) (*pb.Data, error) { - pbdata := new(pb.Data) - err := proto.Unmarshal(data, pbdata) - if err != nil { - return nil, err - } - return pbdata, nil -} - -// FilePBData creates a protobuf File with the given -// byte slice and returns the marshaled protobuf bytes representing it. -func FilePBData(data []byte, totalsize uint64) []byte { - pbfile := new(pb.Data) - typ := pb.Data_File - pbfile.Type = &typ - pbfile.Data = data - pbfile.Filesize = proto.Uint64(totalsize) - - data, err := proto.Marshal(pbfile) - if err != nil { - // This really shouldnt happen, i promise - // The only failure case for marshal is if required fields - // are not filled out, and they all are. If the proto object - // gets changed and nobody updates this function, the code - // should panic due to programmer error - panic(err) - } - return data -} - -//FolderPBData returns Bytes that represent a Directory. -func FolderPBData() []byte { - pbfile := new(pb.Data) - typ := pb.Data_Directory - pbfile.Type = &typ - - data, err := proto.Marshal(pbfile) - if err != nil { - //this really shouldnt happen, i promise - panic(err) - } - return data -} - -//WrapData marshals raw bytes into a `Data_Raw` type protobuf message. -func WrapData(b []byte) []byte { - pbdata := new(pb.Data) - typ := pb.Data_Raw - pbdata.Data = b - pbdata.Type = &typ - pbdata.Filesize = proto.Uint64(uint64(len(b))) - - out, err := proto.Marshal(pbdata) - if err != nil { - // This shouldnt happen. seriously. - panic(err) - } - - return out -} - -//SymlinkData returns a `Data_Symlink` protobuf message for the path you specify. -func SymlinkData(path string) ([]byte, error) { - pbdata := new(pb.Data) - typ := pb.Data_Symlink - pbdata.Data = []byte(path) - pbdata.Type = &typ - - out, err := proto.Marshal(pbdata) - if err != nil { - return nil, err - } - - return out, nil -} - -// UnwrapData unmarshals a protobuf messages and returns the contents. -func UnwrapData(data []byte) ([]byte, error) { - pbdata := new(pb.Data) - err := proto.Unmarshal(data, pbdata) - if err != nil { - return nil, err - } - return pbdata.GetData(), nil -} - -// DataSize returns the size of the contents in protobuf wrapped slice. -// For raw data it simply provides the length of it. For Data_Files, it -// will return the associated filesize. Note that Data_Directories will -// return an error. -func DataSize(data []byte) (uint64, error) { - pbdata := new(pb.Data) - err := proto.Unmarshal(data, pbdata) - if err != nil { - return 0, err - } - - switch pbdata.GetType() { - case pb.Data_Directory: - return 0, errors.New("can't get data size of directory") - case pb.Data_File: - return pbdata.GetFilesize(), nil - case pb.Data_Raw: - return uint64(len(pbdata.GetData())), nil - default: - return 0, errors.New("unrecognized node data type") - } -} - -// An FSNode represents a filesystem object using the UnixFS specification. -// -// The `NewFSNode` constructor should be used instead of just calling `new(FSNode)` -// to guarantee that the required (`Type` and `Filesize`) fields in the `format` -// structure are initialized before marshaling (in `GetBytes()`). -type FSNode struct { - - // UnixFS format defined as a protocol buffers message. - format pb.Data -} - -// FSNodeFromBytes unmarshal a protobuf message onto an FSNode. -func FSNodeFromBytes(b []byte) (*FSNode, error) { - n := new(FSNode) - err := proto.Unmarshal(b, &n.format) - if err != nil { - return nil, err - } - - return n, nil -} - -// NewFSNode creates a new FSNode structure with the given `dataType`. -// -// It initializes the (required) `Type` field (that doesn't have a `Set()` -// accessor so it must be specified at creation), otherwise the `Marshal()` -// method in `GetBytes()` would fail (`required field "Type" not set`). -// -// It also initializes the `Filesize` pointer field to ensure its value -// is never nil before marshaling, this is not a required field but it is -// done to be backwards compatible with previous `go-ipfs` versions hash. -// (If it wasn't initialized there could be cases where `Filesize` could -// have been left at nil, when the `FSNode` was created but no data or -// child nodes were set to adjust it, as is the case in `NewLeaf()`.) -func NewFSNode(dataType pb.Data_DataType) *FSNode { - n := new(FSNode) - n.format.Type = &dataType - - // Initialize by `Filesize` by updating it with a dummy (zero) value. - n.UpdateFilesize(0) - - return n -} - -// AddBlockSize adds the size of the next child block of this node -func (n *FSNode) AddBlockSize(s uint64) { - n.UpdateFilesize(int64(s)) - n.format.Blocksizes = append(n.format.Blocksizes, s) -} - -// RemoveBlockSize removes the given child block's size. -func (n *FSNode) RemoveBlockSize(i int) { - n.UpdateFilesize(-int64(n.format.Blocksizes[i])) - n.format.Blocksizes = append(n.format.Blocksizes[:i], n.format.Blocksizes[i+1:]...) -} - -// BlockSize returns the block size indexed by `i`. -// TODO: Evaluate if this function should be bounds checking. -func (n *FSNode) BlockSize(i int) uint64 { - return n.format.Blocksizes[i] -} - -// RemoveAllBlockSizes removes all the child block sizes of this node. -func (n *FSNode) RemoveAllBlockSizes() { - n.format.Blocksizes = []uint64{} - n.format.Filesize = proto.Uint64(uint64(len(n.Data()))) -} - -// GetBytes marshals this node as a protobuf message. -func (n *FSNode) GetBytes() ([]byte, error) { - return proto.Marshal(&n.format) -} - -// FileSize returns the total size of this tree. That is, the size of -// the data in this node plus the size of all its children. -func (n *FSNode) FileSize() uint64 { - return n.format.GetFilesize() -} - -// NumChildren returns the number of child blocks of this node -func (n *FSNode) NumChildren() int { - return len(n.format.Blocksizes) -} - -// Data retrieves the `Data` field from the internal `format`. -func (n *FSNode) Data() []byte { - return n.format.GetData() -} - -// SetData sets the `Data` field from the internal `format` -// updating its `Filesize`. -func (n *FSNode) SetData(newData []byte) { - n.UpdateFilesize(int64(len(newData) - len(n.Data()))) - n.format.Data = newData -} - -// UpdateFilesize updates the `Filesize` field from the internal `format` -// by a signed difference (`filesizeDiff`). -// TODO: Add assert to check for `Filesize` > 0? -func (n *FSNode) UpdateFilesize(filesizeDiff int64) { - n.format.Filesize = proto.Uint64(uint64( - int64(n.format.GetFilesize()) + filesizeDiff)) -} - -// Type retrieves the `Type` field from the internal `format`. -func (n *FSNode) Type() pb.Data_DataType { - return n.format.GetType() -} - -// Metadata is used to store additional FSNode information. -type Metadata struct { - MimeType string - Size uint64 -} - -// MetadataFromBytes Unmarshals a protobuf Data message into Metadata. -// The provided slice should have been encoded with BytesForMetadata(). -func MetadataFromBytes(b []byte) (*Metadata, error) { - pbd := new(pb.Data) - err := proto.Unmarshal(b, pbd) - if err != nil { - return nil, err - } - if pbd.GetType() != pb.Data_Metadata { - return nil, errors.New("incorrect node type") - } - - pbm := new(pb.Metadata) - err = proto.Unmarshal(pbd.Data, pbm) - if err != nil { - return nil, err - } - md := new(Metadata) - md.MimeType = pbm.GetMimeType() - return md, nil -} - -// Bytes marshals Metadata as a protobuf message of Metadata type. -func (m *Metadata) Bytes() ([]byte, error) { - pbm := new(pb.Metadata) - pbm.MimeType = &m.MimeType - return proto.Marshal(pbm) -} - -// BytesForMetadata wraps the given Metadata as a profobuf message of Data type, -// setting the DataType to Metadata. The wrapped bytes are itself the -// result of calling m.Bytes(). -func BytesForMetadata(m *Metadata) ([]byte, error) { - pbd := new(pb.Data) - pbd.Filesize = proto.Uint64(m.Size) - typ := pb.Data_Metadata - pbd.Type = &typ - mdd, err := m.Bytes() - if err != nil { - return nil, err - } - - pbd.Data = mdd - return proto.Marshal(pbd) -} - -// EmptyDirNode creates an empty folder Protonode. -func EmptyDirNode() *dag.ProtoNode { - return dag.NodeWithData(FolderPBData()) -} diff --git a/unixfs/unixfs_test.go b/unixfs/unixfs_test.go deleted file mode 100644 index eb0f07368..000000000 --- a/unixfs/unixfs_test.go +++ /dev/null @@ -1,160 +0,0 @@ -package unixfs - -import ( - "bytes" - "testing" - - proto "gx/ipfs/QmZ4Qi3GaRbjcx28Sme5eMH7RQjGkt8wHxt2a65oLaeFEV/gogo-protobuf/proto" - - pb "gx/ipfs/QmSaz8Qg77gGqvDvLKeSAY7ivDEnramSWF6T7TcRwFpHtP/go-unixfs/pb" -) - -func TestFSNode(t *testing.T) { - fsn := NewFSNode(TFile) - for i := 0; i < 16; i++ { - fsn.AddBlockSize(100) - } - fsn.RemoveBlockSize(15) - - fsn.SetData(make([]byte, 128)) - - b, err := fsn.GetBytes() - if err != nil { - t.Fatal(err) - } - - pbn := new(pb.Data) - err = proto.Unmarshal(b, pbn) - if err != nil { - t.Fatal(err) - } - - ds, err := DataSize(b) - if err != nil { - t.Fatal(err) - } - nKids := fsn.NumChildren() - if nKids != 15 { - t.Fatal("Wrong number of child nodes") - } - - if ds != (100*15)+128 { - t.Fatal("Datasize calculations incorrect!") - } - - nfsn, err := FSNodeFromBytes(b) - if err != nil { - t.Fatal(err) - } - - if nfsn.FileSize() != (100*15)+128 { - t.Fatal("fsNode FileSize calculations incorrect") - } -} - -func TestPBdataTools(t *testing.T) { - raw := []byte{0x00, 0x01, 0x02, 0x17, 0xA1} - rawPB := WrapData(raw) - - pbDataSize, err := DataSize(rawPB) - if err != nil { - t.Fatal(err) - } - - same := len(raw) == int(pbDataSize) - if !same { - t.Fatal("WrapData changes the size of data.") - } - - rawPBBytes, err := UnwrapData(rawPB) - if err != nil { - t.Fatal(err) - } - - same = bytes.Equal(raw, rawPBBytes) - if !same { - t.Fatal("Unwrap failed to produce the correct wrapped data.") - } - - rawPBdata, err := FromBytes(rawPB) - if err != nil { - t.Fatal(err) - } - - isRaw := rawPBdata.GetType() == TRaw - if !isRaw { - t.Fatal("WrapData does not create pb.Data_Raw!") - } - - catFile := []byte("Mr_Meowgie.gif") - catPBfile := FilePBData(catFile, 17) - catSize, err := DataSize(catPBfile) - if catSize != 17 { - t.Fatal("FilePBData is the wrong size.") - } - if err != nil { - t.Fatal(err) - } - - dirPB := FolderPBData() - dir, err := FromBytes(dirPB) - isDir := dir.GetType() == TDirectory - if !isDir { - t.Fatal("FolderPBData does not create a directory!") - } - if err != nil { - t.Fatal(err) - } - _, dirErr := DataSize(dirPB) - if dirErr == nil { - t.Fatal("DataSize didn't throw an error when taking the size of a directory.") - } - - catSym, err := SymlinkData("/ipfs/adad123123/meowgie.gif") - if err != nil { - t.Fatal(err) - } - - catSymPB, err := FromBytes(catSym) - isSym := catSymPB.GetType() == TSymlink - if !isSym { - t.Fatal("Failed to make a Symlink.") - } - if err != nil { - t.Fatal(err) - } - - _, sizeErr := DataSize(catSym) - if sizeErr == nil { - t.Fatal("DataSize didn't throw an error when taking the size of a Symlink.") - } - -} - -func TestMetadata(t *testing.T) { - meta := &Metadata{ - MimeType: "audio/aiff", - Size: 12345, - } - - _, err := meta.Bytes() - if err != nil { - t.Fatal(err) - } - - metaPB, err := BytesForMetadata(meta) - if err != nil { - t.Fatal(err) - } - - meta, err = MetadataFromBytes(metaPB) - if err != nil { - t.Fatal(err) - } - - mimeAiff := meta.MimeType == "audio/aiff" - if !mimeAiff { - t.Fatal("Metadata does not Marshal and Unmarshal properly!") - } - -}