From 5464020c5884030fbab3e423bb6dc9ed226098d2 Mon Sep 17 00:00:00 2001 From: Jeromy Date: Wed, 9 Sep 2015 20:20:23 -0700 Subject: [PATCH 1/3] first pass at a tar importer License: MIT Signed-off-by: Jeromy --- core/commands/root.go | 1 + core/commands/tar.go | 113 ++++++++++++++++++++++++++ tar/format.go | 185 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 299 insertions(+) create mode 100644 core/commands/tar.go create mode 100644 tar/format.go diff --git a/core/commands/root.go b/core/commands/root.go index 38cfa8472..987178058 100644 --- a/core/commands/root.go +++ b/core/commands/root.go @@ -108,6 +108,7 @@ var rootSubcommands = map[string]*cmds.Command{ "resolve": ResolveCmd, "stats": StatsCmd, "swarm": SwarmCmd, + "tar": TarCmd, "tour": tourCmd, "file": unixfs.UnixFSCmd, "update": UpdateCmd, diff --git a/core/commands/tar.go b/core/commands/tar.go new file mode 100644 index 000000000..0d6fc1318 --- /dev/null +++ b/core/commands/tar.go @@ -0,0 +1,113 @@ +package commands + +import ( + "io" + "strings" + + cmds "github.com/ipfs/go-ipfs/commands" + core "github.com/ipfs/go-ipfs/core" + path "github.com/ipfs/go-ipfs/path" + tar "github.com/ipfs/go-ipfs/tar" +) + +var TarCmd = &cmds.Command{ + Helptext: cmds.HelpText{ + Tagline: "utility functions for tar files in ipfs", + }, + + Subcommands: map[string]*cmds.Command{ + "add": tarAddCmd, + "cat": tarCatCmd, + }, +} + +var tarAddCmd = &cmds.Command{ + Helptext: cmds.HelpText{ + Tagline: "import a tar file into ipfs", + ShortDescription: ` +'ipfs tar add' will parse a tar file and create a merkledag structure to represent it. +`, + }, + + Arguments: []cmds.Argument{ + cmds.FileArg("file", true, false, "tar file to add").EnableStdin(), + }, + Run: func(req cmds.Request, res cmds.Response) { + nd, err := req.InvocContext().GetNode() + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + fi, err := req.Files().NextFile() + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + node, err := tar.ImportTar(fi, nd.DAG) + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + k, err := node.Key() + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + fi.FileName() + res.SetOutput(&AddedObject{ + Name: fi.FileName(), + Hash: k.B58String(), + }) + }, + Type: AddedObject{}, + Marshalers: cmds.MarshalerMap{ + cmds.Text: func(res cmds.Response) (io.Reader, error) { + o := res.Output().(*AddedObject) + return strings.NewReader(o.Hash), nil + }, + }, +} + +var tarCatCmd = &cmds.Command{ + Helptext: cmds.HelpText{ + Tagline: "export a tar file from ipfs", + ShortDescription: ` +'ipfs tar cat' will export a tar file from a previously imported one in ipfs +`, + }, + + Arguments: []cmds.Argument{ + cmds.StringArg("path", true, false, "ipfs path of archive to export").EnableStdin(), + }, + Run: func(req cmds.Request, res cmds.Response) { + nd, err := req.InvocContext().GetNode() + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + p, err := path.ParsePath(req.Arguments()[0]) + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + root, err := core.Resolve(req.Context(), nd, p) + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + r, err := tar.ExportTar(req.Context(), root, nd.DAG) + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } + + res.SetOutput(r) + }, +} diff --git a/tar/format.go b/tar/format.go new file mode 100644 index 000000000..cedb7f885 --- /dev/null +++ b/tar/format.go @@ -0,0 +1,185 @@ +package tarfmt + +import ( + "archive/tar" + "bytes" + "errors" + "io" + "io/ioutil" + + importer "github.com/ipfs/go-ipfs/importer" + chunk "github.com/ipfs/go-ipfs/importer/chunk" + dag "github.com/ipfs/go-ipfs/merkledag" + uio "github.com/ipfs/go-ipfs/unixfs/io" + + context "github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context" +) + +var blockSize = 512 +var zeroBlock = make([]byte, blockSize) + +func marshalHeader(h *tar.Header) ([]byte, error) { + buf := new(bytes.Buffer) + w := tar.NewWriter(buf) + err := w.WriteHeader(h) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func ImportTar(r io.Reader, ds dag.DAGService) (*dag.Node, error) { + rall, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + + r = bytes.NewReader(rall) + + tr := tar.NewReader(r) + + root := new(dag.Node) + root.Data = []byte("ipfs/tar") + + for { + h, err := tr.Next() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + + header := new(dag.Node) + + headerBytes, err := marshalHeader(h) + if err != nil { + return nil, err + } + + header.Data = headerBytes + + if h.Size > 0 { + spl := chunk.NewRabin(tr, uint64(chunk.DefaultBlockSize)) + nd, err := importer.BuildDagFromReader(ds, spl, nil) + if err != nil { + return nil, err + } + + err = header.AddNodeLinkClean("data", nd) + if err != nil { + return nil, err + } + } + + _, err = ds.Add(header) + if err != nil { + return nil, err + } + + err = root.AddNodeLinkClean(h.Name, header) + if err != nil { + return nil, err + } + } + + _, err = ds.Add(root) + if err != nil { + return nil, err + } + + return root, nil +} + +type tarReader struct { + links []*dag.Link + ds dag.DAGService + + hdrBuf *bytes.Reader + fileRead *countReader + pad int + + ctx context.Context +} + +func (tr *tarReader) Read(b []byte) (int, error) { + if tr.hdrBuf != nil { + n, err := tr.hdrBuf.Read(b) + if err == io.EOF { + tr.hdrBuf = nil + return n, nil + } + return n, err + } + if tr.fileRead != nil { + n, err := tr.fileRead.Read(b) + if err == io.EOF { + nr := tr.fileRead.n + tr.pad = (blockSize - (nr % blockSize)) % blockSize + tr.fileRead.Close() + tr.fileRead = nil + return n, nil + } + return n, err + } + if tr.pad > 0 { + n := copy(b, zeroBlock[:tr.pad]) + tr.pad -= n + return n, nil + } + + if len(tr.links) == 0 { + return 0, io.EOF + } + + next := tr.links[0] + tr.links = tr.links[1:] + + headerNd, err := next.GetNode(tr.ctx, tr.ds) + if err != nil { + return 0, err + } + + tr.hdrBuf = bytes.NewReader(headerNd.Data) + if len(headerNd.Links) > 0 { + data, err := headerNd.Links[0].GetNode(tr.ctx, tr.ds) + if err != nil { + return 0, err + } + + dr, err := uio.NewDagReader(tr.ctx, data, tr.ds) + if err != nil { + return 0, err + } + + tr.fileRead = &countReader{r: dr} + } + + return tr.Read(b) +} + +func ExportTar(ctx context.Context, root *dag.Node, ds dag.DAGService) (io.Reader, error) { + if string(root.Data) != "ipfs/tar" { + return nil, errors.New("not an ipfs tarchive") + } + return &tarReader{ + links: root.Links, + ds: ds, + ctx: ctx, + }, nil +} + +type countReader struct { + r io.ReadCloser + n int +} + +func (r *countReader) Read(b []byte) (int, error) { + n, err := r.r.Read(b) + r.n += n + return n, err +} + +func (r *countReader) Close() error { + return r.r.Close() +} From 678e74bdae51faa7d07bf5b1ba2abd783367a27c Mon Sep 17 00:00:00 2001 From: Jeromy Date: Thu, 10 Sep 2015 12:34:04 -0700 Subject: [PATCH 2/3] sharding directory entries down to not create a massive root node License: MIT Signed-off-by: Jeromy --- tar/format.go | 63 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/tar/format.go b/tar/format.go index cedb7f885..0aac6a870 100644 --- a/tar/format.go +++ b/tar/format.go @@ -6,15 +6,20 @@ import ( "errors" "io" "io/ioutil" + "strings" importer "github.com/ipfs/go-ipfs/importer" chunk "github.com/ipfs/go-ipfs/importer/chunk" dag "github.com/ipfs/go-ipfs/merkledag" + dagutil "github.com/ipfs/go-ipfs/merkledag/utils" uio "github.com/ipfs/go-ipfs/unixfs/io" + u "github.com/ipfs/go-ipfs/util" context "github.com/ipfs/go-ipfs/Godeps/_workspace/src/golang.org/x/net/context" ) +var log = u.Logger("tarfmt") + var blockSize = 512 var zeroBlock = make([]byte, blockSize) @@ -41,6 +46,8 @@ func ImportTar(r io.Reader, ds dag.DAGService) (*dag.Node, error) { root := new(dag.Node) root.Data = []byte("ipfs/tar") + e := dagutil.NewDagEditor(ds, root) + for { h, err := tr.Next() if err != nil { @@ -77,12 +84,14 @@ func ImportTar(r io.Reader, ds dag.DAGService) (*dag.Node, error) { return nil, err } - err = root.AddNodeLinkClean(h.Name, header) + path := escapePath(h.Name) + err = e.InsertNodeAtPath(context.Background(), path, header, func() *dag.Node { return new(dag.Node) }) if err != nil { return nil, err } } + root = e.GetNode() _, err = ds.Add(root) if err != nil { return nil, err @@ -91,18 +100,30 @@ func ImportTar(r io.Reader, ds dag.DAGService) (*dag.Node, error) { return root, nil } +// adds a '-' to the beginning of each path element so we can use 'data' as a +// special link in the structure without having to worry about +func escapePath(path string) string { + elems := strings.Split(strings.Trim(path, "/"), "/") + for i, e := range elems { + elems[i] = "-" + e + } + return strings.Join(elems, "/") +} + type tarReader struct { links []*dag.Link ds dag.DAGService - hdrBuf *bytes.Reader - fileRead *countReader - pad int + childRead *tarReader + hdrBuf *bytes.Reader + fileRead *countReader + pad int ctx context.Context } func (tr *tarReader) Read(b []byte) (int, error) { + // if we have a header to be read, it takes priority if tr.hdrBuf != nil { n, err := tr.hdrBuf.Read(b) if err == io.EOF { @@ -111,6 +132,18 @@ func (tr *tarReader) Read(b []byte) (int, error) { } return n, err } + + // no header remaining, check for recursive + if tr.childRead != nil { + n, err := tr.childRead.Read(b) + if err == io.EOF { + tr.childRead = nil + return n, nil + } + return n, err + } + + // check for filedata to be read if tr.fileRead != nil { n, err := tr.fileRead.Read(b) if err == io.EOF { @@ -122,6 +155,8 @@ func (tr *tarReader) Read(b []byte) (int, error) { } return n, err } + + // filedata reads must be padded out to 512 byte offsets if tr.pad > 0 { n := copy(b, zeroBlock[:tr.pad]) tr.pad -= n @@ -141,18 +176,26 @@ func (tr *tarReader) Read(b []byte) (int, error) { } tr.hdrBuf = bytes.NewReader(headerNd.Data) - if len(headerNd.Links) > 0 { - data, err := headerNd.Links[0].GetNode(tr.ctx, tr.ds) - if err != nil { - return 0, err - } - dr, err := uio.NewDagReader(tr.ctx, data, tr.ds) + dataNd, err := headerNd.GetLinkedNode(tr.ctx, tr.ds, "data") + if err != nil && err != dag.ErrNotFound { + return 0, err + } + + if err == nil { + dr, err := uio.NewDagReader(tr.ctx, dataNd, tr.ds) if err != nil { + log.Error("dagreader error: ", err) return 0, err } tr.fileRead = &countReader{r: dr} + } else if len(headerNd.Links) > 0 { + tr.childRead = &tarReader{ + links: headerNd.Links, + ds: tr.ds, + ctx: tr.ctx, + } } return tr.Read(b) From 533a729949f09556aca3f82fd19d6b7257bb4b9b Mon Sep 17 00:00:00 2001 From: Jeromy Date: Thu, 10 Sep 2015 17:11:35 -0700 Subject: [PATCH 3/3] add sharness test for tar commands License: MIT Signed-off-by: Jeromy --- test/sharness/t0210-tar.sh | 49 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100755 test/sharness/t0210-tar.sh diff --git a/test/sharness/t0210-tar.sh b/test/sharness/t0210-tar.sh new file mode 100755 index 000000000..48e3c9dd8 --- /dev/null +++ b/test/sharness/t0210-tar.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# +# Copyright (c) 2015 Jeromy Johnson +# MIT Licensed; see the LICENSE file in this repository. +# + +test_description="Test tar commands" + +. lib/test-lib.sh + +test_init_ipfs + +test_expect_success "create some random files" ' + mkdir foo && + random 10000 > foo/a && + random 12345 > foo/b && + mkdir foo/bar && + random 5432 > foo/bar/baz && + ln -s ../a foo/bar/link && + echo "exit" > foo/script && + chmod +x foo/script +' + +test_expect_success "tar those random files up" ' + tar cf files.tar foo/ +' + +test_expect_success "'ipfs tar add' succeeds" ' + TAR_HASH=$(ipfs tar add files.tar) +' + +test_expect_success "'ipfs tar cat' succeeds" ' + mkdir output && + ipfs tar cat $TAR_HASH > output/out.tar +' + +test_expect_success "can extract tar" ' + tar xf output/out.tar -C output/ +' + +test_expect_success "files look right" ' + diff foo/a output/foo/a && + diff foo/b output/foo/b && + diff foo/bar/baz output/foo/bar/baz && + [ -L output/foo/bar/link ] && + [ -x foo/script ] +' + +test_done