From bcce9ef0880b6320952425700f3f828405ba2d69 Mon Sep 17 00:00:00 2001 From: Juan Batiz-Benet Date: Wed, 7 Jan 2015 11:04:35 -0800 Subject: [PATCH] merkledag traversal --- merkledag/traverse/traverse.go | 226 ++++++++++++++++ merkledag/traverse/traverse_test.go | 397 ++++++++++++++++++++++++++++ 2 files changed, 623 insertions(+) create mode 100644 merkledag/traverse/traverse.go create mode 100644 merkledag/traverse/traverse_test.go diff --git a/merkledag/traverse/traverse.go b/merkledag/traverse/traverse.go new file mode 100644 index 000000000..688c409f2 --- /dev/null +++ b/merkledag/traverse/traverse.go @@ -0,0 +1,226 @@ +// Package traverse provides merkledag traversal functions +package traverse + +import ( + "errors" + + mdag "github.com/jbenet/go-ipfs/merkledag" +) + +// Order is an identifier for traversal algorithm orders +type Order int + +const ( + DFSPre Order = iota // depth-first pre-order + DFSPost // depth-first post-order + BFS // breadth-first +) + +// Options specifies a series of traversal options +type Options struct { + DAG mdag.DAGService // the dagservice to fetch nodes + Order Order // what order to traverse in + Func Func // the function to perform at each step + ErrFunc ErrFunc // see ErrFunc. Optional + + SkipDuplicates bool // whether to skip duplicate nodes +} + +// State is a current traversal state +type State struct { + Node *mdag.Node + Depth int +} + +type traversal struct { + opts Options + seen map[string]struct{} +} + +func (t *traversal) shouldSkip(n *mdag.Node) (bool, error) { + if t.opts.SkipDuplicates { + k, err := n.Key() + if err != nil { + return true, err + } + + if _, found := t.seen[string(k)]; found { + return true, nil + } + t.seen[string(k)] = struct{}{} + } + + return false, nil +} + +func (t *traversal) callFunc(next State) error { + return t.opts.Func(next) +} + +// getNode returns the node for link. If it return an error, +// stop processing. if it returns a nil node, just skip it. +// +// the error handling is a little complicated. +func (t *traversal) getNode(link *mdag.Link) (*mdag.Node, error) { + + getNode := func(l *mdag.Link) (*mdag.Node, error) { + next, err := l.GetNode(t.opts.DAG) + if err != nil { + return nil, err + } + + skip, err := t.shouldSkip(next) + if skip { + next = nil + } + return next, err + } + + next, err := getNode(link) + if err != nil && t.opts.ErrFunc != nil { // attempt recovery. + err = t.opts.ErrFunc(err) + next = nil // skip regardless + } + return next, err +} + +// Func is the type of the function called for each dag.Node visited by Traverse. +// The traversal argument contains the current traversal state. +// If an error is returned, processing stops. +type Func func(current State) error + +// If there is a problem walking to the Node, and ErrFunc is provided, Traverse +// will call ErrFunc with the error encountered. ErrFunc can decide how to handle +// that error, and return an error back to Traversal with how to proceed: +// * nil - skip the Node and its children, but continue processing +// * all other errors halt processing immediately. +// +// If ErrFunc is nil, Traversal will stop, as if: +// +// opts.ErrFunc = func(err error) { return err } +// +type ErrFunc func(err error) error + +func Traverse(root *mdag.Node, o Options) error { + t := traversal{ + opts: o, + seen: map[string]struct{}{}, + } + + state := State{ + Node: root, + Depth: 0, + } + + switch o.Order { + default: + return dfsPreTraverse(state, &t) + case DFSPre: + return dfsPreTraverse(state, &t) + case DFSPost: + return dfsPostTraverse(state, &t) + case BFS: + return bfsTraverse(state, &t) + } +} + +type dfsFunc func(state State, t *traversal) error + +func dfsPreTraverse(state State, t *traversal) error { + if err := t.callFunc(state); err != nil { + return err + } + if err := dfsDescend(dfsPreTraverse, state, t); err != nil { + return err + } + return nil +} + +func dfsPostTraverse(state State, t *traversal) error { + if err := dfsDescend(dfsPostTraverse, state, t); err != nil { + return err + } + if err := t.callFunc(state); err != nil { + return err + } + return nil +} + +func dfsDescend(df dfsFunc, curr State, t *traversal) error { + for _, l := range curr.Node.Links { + node, err := t.getNode(l) + if err != nil { + return err + } + if node == nil { // skip + continue + } + + next := State{ + Node: node, + Depth: curr.Depth + 1, + } + if err := df(next, t); err != nil { + return err + } + } + return nil +} + +func bfsTraverse(root State, t *traversal) error { + + if skip, err := t.shouldSkip(root.Node); skip || err != nil { + return err + } + + var q queue + q.enq(root) + for q.len() > 0 { + curr := q.deq() + if curr.Node == nil { + return errors.New("failed to dequeue though queue not empty") + } + + // call user's func + if err := t.callFunc(curr); err != nil { + return err + } + + for _, l := range curr.Node.Links { + node, err := t.getNode(l) + if err != nil { + return err + } + if node == nil { // skip + continue + } + + q.enq(State{ + Node: node, + Depth: curr.Depth + 1, + }) + } + } + return nil +} + +type queue struct { + s []State +} + +func (q *queue) enq(n State) { + q.s = append(q.s, n) +} + +func (q *queue) deq() State { + if len(q.s) < 1 { + return State{} + } + n := q.s[0] + q.s = q.s[1:] + return n +} + +func (q *queue) len() int { + return len(q.s) +} diff --git a/merkledag/traverse/traverse_test.go b/merkledag/traverse/traverse_test.go new file mode 100644 index 000000000..912ce34d7 --- /dev/null +++ b/merkledag/traverse/traverse_test.go @@ -0,0 +1,397 @@ +package traverse + +import ( + "bytes" + "fmt" + "testing" + + mdag "github.com/jbenet/go-ipfs/merkledag" +) + +func TestDFSPreNoSkip(t *testing.T) { + opts := Options{Order: DFSPre} + + testWalkOutputs(t, newFan(t), opts, []byte(` +0 /a +1 /a/aa +1 /a/ab +1 /a/ac +1 /a/ad +`)) + + testWalkOutputs(t, newLinkedList(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) + + testWalkOutputs(t, newBinaryTree(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +2 /a/aa/aab +1 /a/ab +2 /a/ab/aba +2 /a/ab/abb +`)) + + testWalkOutputs(t, newBinaryDAG(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) +} + +func TestDFSPreSkip(t *testing.T) { + opts := Options{Order: DFSPre, SkipDuplicates: true} + + testWalkOutputs(t, newFan(t), opts, []byte(` +0 /a +1 /a/aa +1 /a/ab +1 /a/ac +1 /a/ad +`)) + + testWalkOutputs(t, newLinkedList(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) + + testWalkOutputs(t, newBinaryTree(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +2 /a/aa/aab +1 /a/ab +2 /a/ab/aba +2 /a/ab/abb +`)) + + testWalkOutputs(t, newBinaryDAG(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) +} + +func TestDFSPostNoSkip(t *testing.T) { + opts := Options{Order: DFSPost} + + testWalkOutputs(t, newFan(t), opts, []byte(` +1 /a/aa +1 /a/ab +1 /a/ac +1 /a/ad +0 /a +`)) + + testWalkOutputs(t, newLinkedList(t), opts, []byte(` +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +2 /a/aa/aaa +1 /a/aa +0 /a +`)) + + testWalkOutputs(t, newBinaryTree(t), opts, []byte(` +2 /a/aa/aaa +2 /a/aa/aab +1 /a/aa +2 /a/ab/aba +2 /a/ab/abb +1 /a/ab +0 /a +`)) + + testWalkOutputs(t, newBinaryDAG(t), opts, []byte(` +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +2 /a/aa/aaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +2 /a/aa/aaa +1 /a/aa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +2 /a/aa/aaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +2 /a/aa/aaa +1 /a/aa +0 /a +`)) +} + +func TestDFSPostSkip(t *testing.T) { + opts := Options{Order: DFSPost, SkipDuplicates: true} + + testWalkOutputs(t, newFan(t), opts, []byte(` +1 /a/aa +1 /a/ab +1 /a/ac +1 /a/ad +0 /a +`)) + + testWalkOutputs(t, newLinkedList(t), opts, []byte(` +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +2 /a/aa/aaa +1 /a/aa +0 /a +`)) + + testWalkOutputs(t, newBinaryTree(t), opts, []byte(` +2 /a/aa/aaa +2 /a/aa/aab +1 /a/aa +2 /a/ab/aba +2 /a/ab/abb +1 /a/ab +0 /a +`)) + + testWalkOutputs(t, newBinaryDAG(t), opts, []byte(` +4 /a/aa/aaa/aaaa/aaaaa +3 /a/aa/aaa/aaaa +2 /a/aa/aaa +1 /a/aa +0 /a +`)) +} + +func TestBFSNoSkip(t *testing.T) { + opts := Options{Order: BFS} + + testWalkOutputs(t, newFan(t), opts, []byte(` +0 /a +1 /a/aa +1 /a/ab +1 /a/ac +1 /a/ad +`)) + + testWalkOutputs(t, newLinkedList(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) + + testWalkOutputs(t, newBinaryTree(t), opts, []byte(` +0 /a +1 /a/aa +1 /a/ab +2 /a/aa/aaa +2 /a/aa/aab +2 /a/ab/aba +2 /a/ab/abb +`)) + + testWalkOutputs(t, newBinaryDAG(t), opts, []byte(` +0 /a +1 /a/aa +1 /a/aa +2 /a/aa/aaa +2 /a/aa/aaa +2 /a/aa/aaa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +3 /a/aa/aaa/aaaa +3 /a/aa/aaa/aaaa +3 /a/aa/aaa/aaaa +3 /a/aa/aaa/aaaa +3 /a/aa/aaa/aaaa +3 /a/aa/aaa/aaaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) +} + +func TestBFSSkip(t *testing.T) { + opts := Options{Order: BFS, SkipDuplicates: true} + + testWalkOutputs(t, newFan(t), opts, []byte(` +0 /a +1 /a/aa +1 /a/ab +1 /a/ac +1 /a/ad +`)) + + testWalkOutputs(t, newLinkedList(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) + + testWalkOutputs(t, newBinaryTree(t), opts, []byte(` +0 /a +1 /a/aa +1 /a/ab +2 /a/aa/aaa +2 /a/aa/aab +2 /a/ab/aba +2 /a/ab/abb +`)) + + testWalkOutputs(t, newBinaryDAG(t), opts, []byte(` +0 /a +1 /a/aa +2 /a/aa/aaa +3 /a/aa/aaa/aaaa +4 /a/aa/aaa/aaaa/aaaaa +`)) +} + +func testWalkOutputs(t *testing.T, root *mdag.Node, opts Options, expect []byte) { + expect = bytes.TrimLeft(expect, "\n") + + var buf bytes.Buffer + walk := func(current State) error { + s := fmt.Sprintf("%d %s\n", current.Depth, current.Node.Data) + t.Logf("walk: %s", s) + buf.Write([]byte(s)) + return nil + } + + opts.Func = walk + if err := Traverse(root, opts); err != nil { + t.Error(err) + return + } + + actual := buf.Bytes() + if !bytes.Equal(actual, expect) { + t.Error("error: outputs differ") + t.Logf("expect:\n%s", expect) + t.Logf("actual:\n%s", actual) + } else { + t.Logf("expect matches actual:\n%s", expect) + } +} + +func newFan(t *testing.T) *mdag.Node { + a := &mdag.Node{Data: []byte("/a")} + addChild(t, a, "aa") + addChild(t, a, "ab") + addChild(t, a, "ac") + addChild(t, a, "ad") + return a +} + +func newLinkedList(t *testing.T) *mdag.Node { + a := &mdag.Node{Data: []byte("/a")} + aa := addChild(t, a, "aa") + aaa := addChild(t, aa, "aaa") + aaaa := addChild(t, aaa, "aaaa") + addChild(t, aaaa, "aaaaa") + return a +} + +func newBinaryTree(t *testing.T) *mdag.Node { + a := &mdag.Node{Data: []byte("/a")} + aa := addChild(t, a, "aa") + ab := addChild(t, a, "ab") + addChild(t, aa, "aaa") + addChild(t, aa, "aab") + addChild(t, ab, "aba") + addChild(t, ab, "abb") + return a +} + +func newBinaryDAG(t *testing.T) *mdag.Node { + a := &mdag.Node{Data: []byte("/a")} + aa := addChild(t, a, "aa") + aaa := addChild(t, aa, "aaa") + aaaa := addChild(t, aaa, "aaaa") + aaaaa := addChild(t, aaaa, "aaaaa") + addLink(t, a, aa) + addLink(t, aa, aaa) + addLink(t, aaa, aaaa) + addLink(t, aaaa, aaaaa) + return a +} + +func addLink(t *testing.T, a, b *mdag.Node) { + to := string(a.Data) + "2" + string(b.Data) + if err := a.AddNodeLink(to, b); err != nil { + t.Error(err) + } +} + +func addChild(t *testing.T, a *mdag.Node, name string) *mdag.Node { + c := &mdag.Node{Data: []byte(string(a.Data) + "/" + name)} + addLink(t, a, c) + return c +}