From 678e74bdae51faa7d07bf5b1ba2abd783367a27c Mon Sep 17 00:00:00 2001 From: Jeromy <jeromyj@gmail.com> Date: Thu, 10 Sep 2015 12:34:04 -0700 Subject: [PATCH] sharding directory entries down to not create a massive root node License: MIT Signed-off-by: Jeromy <jeromyj@gmail.com> --- tar/format.go | 63 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/tar/format.go b/tar/format.go index cedb7f885..0aac6a870 100644 --- a/tar/format.go +++ b/tar/format.go @@ -6,15 +6,20 @@ import ( "errors" "io" "io/ioutil" + "strings" importer "github.com/ipfs/go-ipfs/importer" chunk "github.com/ipfs/go-ipfs/importer/chunk" dag "github.com/ipfs/go-ipfs/merkledag" + dagutil "github.com/ipfs/go-ipfs/merkledag/utils" uio "github.com/ipfs/go-ipfs/unixfs/io" + u "github.com/ipfs/go-ipfs/util" context "github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context" ) +var log = u.Logger("tarfmt") + var blockSize = 512 var zeroBlock = make([]byte, blockSize) @@ -41,6 +46,8 @@ func ImportTar(r io.Reader, ds dag.DAGService) (*dag.Node, error) { root := new(dag.Node) root.Data = []byte("ipfs/tar") + e := dagutil.NewDagEditor(ds, root) + for { h, err := tr.Next() if err != nil { @@ -77,12 +84,14 @@ func ImportTar(r io.Reader, ds dag.DAGService) (*dag.Node, error) { return nil, err } - err = root.AddNodeLinkClean(h.Name, header) + path := escapePath(h.Name) + err = e.InsertNodeAtPath(context.Background(), path, header, func() *dag.Node { return new(dag.Node) }) if err != nil { return nil, err } } + root = e.GetNode() _, err = ds.Add(root) if err != nil { return nil, err @@ -91,18 +100,30 @@ func ImportTar(r io.Reader, ds dag.DAGService) (*dag.Node, error) { return root, nil } +// adds a '-' to the beginning of each path element so we can use 'data' as a +// special link in the structure without having to worry about +func escapePath(path string) string { + elems := strings.Split(strings.Trim(path, "/"), "/") + for i, e := range elems { + elems[i] = "-" + e + } + return strings.Join(elems, "/") +} + type tarReader struct { links []*dag.Link ds dag.DAGService - hdrBuf *bytes.Reader - fileRead *countReader - pad int + childRead *tarReader + hdrBuf *bytes.Reader + fileRead *countReader + pad int ctx context.Context } func (tr *tarReader) Read(b []byte) (int, error) { + // if we have a header to be read, it takes priority if tr.hdrBuf != nil { n, err := tr.hdrBuf.Read(b) if err == io.EOF { @@ -111,6 +132,18 @@ func (tr *tarReader) Read(b []byte) (int, error) { } return n, err } + + // no header remaining, check for recursive + if tr.childRead != nil { + n, err := tr.childRead.Read(b) + if err == io.EOF { + tr.childRead = nil + return n, nil + } + return n, err + } + + // check for filedata to be read if tr.fileRead != nil { n, err := tr.fileRead.Read(b) if err == io.EOF { @@ -122,6 +155,8 @@ func (tr *tarReader) Read(b []byte) (int, error) { } return n, err } + + // filedata reads must be padded out to 512 byte offsets if tr.pad > 0 { n := copy(b, zeroBlock[:tr.pad]) tr.pad -= n @@ -141,18 +176,26 @@ func (tr *tarReader) Read(b []byte) (int, error) { } tr.hdrBuf = bytes.NewReader(headerNd.Data) - if len(headerNd.Links) > 0 { - data, err := headerNd.Links[0].GetNode(tr.ctx, tr.ds) - if err != nil { - return 0, err - } - dr, err := uio.NewDagReader(tr.ctx, data, tr.ds) + dataNd, err := headerNd.GetLinkedNode(tr.ctx, tr.ds, "data") + if err != nil && err != dag.ErrNotFound { + return 0, err + } + + if err == nil { + dr, err := uio.NewDagReader(tr.ctx, dataNd, tr.ds) if err != nil { + log.Error("dagreader error: ", err) return 0, err } tr.fileRead = &countReader{r: dr} + } else if len(headerNd.Links) > 0 { + tr.childRead = &tarReader{ + links: headerNd.Links, + ds: tr.ds, + ctx: tr.ctx, + } } return tr.Read(b)