1
0
mirror of https://github.com/ipfs/kubo.git synced 2025-06-25 15:08:45 +08:00

Merge pull request #5337 from ipfs/feat/depth-limited-refs

Feat: depth limited refs -r
This commit is contained in:
Steven Allen
2018-08-27 19:29:30 +00:00
committed by GitHub
2 changed files with 282 additions and 54 deletions

View File

@ -10,11 +10,11 @@ import (
cmds "github.com/ipfs/go-ipfs/commands"
"github.com/ipfs/go-ipfs/core"
e "github.com/ipfs/go-ipfs/core/commands/e"
path "gx/ipfs/QmdMPBephdLYNESkruDX2hcDTgFYhoCt4LimWhgnomSdV2/go-path"
"gx/ipfs/QmSP88ryZkHSRn1fnngAaV2Vcn63WUJzAavnRM9CVdU1Ky/go-ipfs-cmdkit"
cmdkit "gx/ipfs/QmSP88ryZkHSRn1fnngAaV2Vcn63WUJzAavnRM9CVdU1Ky/go-ipfs-cmdkit"
ipld "gx/ipfs/QmX5CsuHyVZeTLxgRSYkgLSDQKb9UjE8xnhQzCEJWWWFsC/go-ipld-format"
cid "gx/ipfs/QmZFbDTY9jfSBms2MchvYM9oYRbAF19K7Pby47yDBfpPrb/go-cid"
path "gx/ipfs/QmdMPBephdLYNESkruDX2hcDTgFYhoCt4LimWhgnomSdV2/go-path"
)
// KeyList is a general type for outputting lists of keys
@ -64,6 +64,7 @@ NOTE: List all references recursively by using the flag '-r'.
cmdkit.BoolOption("edges", "e", "Emit edge format: `<from> -> <to>`."),
cmdkit.BoolOption("unique", "u", "Omit duplicate refs from output."),
cmdkit.BoolOption("recursive", "r", "Recursively list links of child nodes."),
cmdkit.IntOption("max-depth", "Only for recursive refs, limits fetch and listing to the given depth").WithDefault(-1),
},
Run: func(req cmds.Request, res cmds.Response) {
ctx := req.Context()
@ -85,6 +86,16 @@ NOTE: List all references recursively by using the flag '-r'.
return
}
maxDepth, _, err := req.Option("max-depth").Int()
if err != nil {
res.SetError(err, cmdkit.ErrNormal)
return
}
if !recursive {
maxDepth = 1 // write only direct refs
}
format, _, err := req.Option("format").String()
if err != nil {
res.SetError(err, cmdkit.ErrNormal)
@ -119,12 +130,12 @@ NOTE: List all references recursively by using the flag '-r'.
defer close(out)
rw := RefWriter{
out: out,
DAG: n.DAG,
Ctx: ctx,
Unique: unique,
PrintFmt: format,
Recursive: recursive,
out: out,
DAG: n.DAG,
Ctx: ctx,
Unique: unique,
PrintFmt: format,
MaxDepth: maxDepth,
}
for _, o := range objs {
@ -231,86 +242,127 @@ type RefWriter struct {
DAG ipld.DAGService
Ctx context.Context
Unique bool
Recursive bool
PrintFmt string
Unique bool
MaxDepth int
PrintFmt string
seen *cid.Set
seen map[string]int
}
// WriteRefs writes refs of the given object to the underlying writer.
func (rw *RefWriter) WriteRefs(n ipld.Node) (int, error) {
if rw.Recursive {
return rw.writeRefsRecursive(n)
}
return rw.writeRefsSingle(n)
return rw.writeRefsRecursive(n, 0)
}
func (rw *RefWriter) writeRefsRecursive(n ipld.Node) (int, error) {
func (rw *RefWriter) writeRefsRecursive(n ipld.Node, depth int) (int, error) {
nc := n.Cid()
var count int
for i, ng := range ipld.GetDAG(rw.Ctx, rw.DAG, n) {
lc := n.Links()[i].Cid
if rw.skip(lc) {
goDeeper, shouldWrite := rw.visit(lc, depth+1) // The children are at depth+1
// Avoid "Get()" on the node and continue with next Link.
// We can do this if:
// - We printed it before (thus it was already seen and
// fetched with Get()
// - AND we must not go deeper.
// This is an optimization for pruned branches which have been
// visited before.
if !shouldWrite && !goDeeper {
continue
}
if err := rw.WriteEdge(nc, lc, n.Links()[i].Name); err != nil {
return count, err
}
// We must Get() the node because:
// - it is new (never written)
// - OR we need to go deeper.
// This ensures printed refs are always fetched.
nd, err := ng.Get(rw.Ctx)
if err != nil {
return count, err
}
c, err := rw.writeRefsRecursive(nd)
count += c
if err != nil {
return count, err
// Write this node if not done before (or !Unique)
if shouldWrite {
if err := rw.WriteEdge(nc, lc, n.Links()[i].Name); err != nil {
return count, err
}
count++
}
// Keep going deeper. This happens:
// - On unexplored branches
// - On branches not explored deep enough
// Note when !Unique, branches are always considered
// unexplored and only depth limits apply.
if goDeeper {
c, err := rw.writeRefsRecursive(nd, depth+1)
count += c
if err != nil {
return count, err
}
}
}
return count, nil
}
func (rw *RefWriter) writeRefsSingle(n ipld.Node) (int, error) {
c := n.Cid()
// visit returns two values:
// - the first boolean is true if we should keep traversing the DAG
// - the second boolean is true if we should print the CID
//
// visit will do branch pruning depending on rw.MaxDepth, previously visited
// cids and whether rw.Unique is set. i.e. rw.Unique = false and
// rw.MaxDepth = -1 disables any pruning. But setting rw.Unique to true will
// prune already visited branches at the cost of keeping as set of visited
// CIDs in memory.
func (rw *RefWriter) visit(c *cid.Cid, depth int) (bool, bool) {
atMaxDepth := rw.MaxDepth >= 0 && depth == rw.MaxDepth
overMaxDepth := rw.MaxDepth >= 0 && depth > rw.MaxDepth
if rw.skip(c) {
return 0, nil
// Shortcut when we are over max depth. In practice, this
// only applies when calling refs with --maxDepth=0, as root's
// children are already over max depth. Otherwise nothing should
// hit this.
if overMaxDepth {
return false, false
}
count := 0
for _, l := range n.Links() {
lc := l.Cid
if rw.skip(lc) {
continue
}
if err := rw.WriteEdge(c, lc, l.Name); err != nil {
return count, err
}
count++
}
return count, nil
}
// skip returns whether to skip a cid
func (rw *RefWriter) skip(c *cid.Cid) bool {
// We can shortcut right away if we don't need unique output:
// - we keep traversing when not atMaxDepth
// - always print
if !rw.Unique {
return false
return !atMaxDepth, true
}
// Unique == true from this point.
// Thus, we keep track of seen Cids, and their depth.
if rw.seen == nil {
rw.seen = cid.NewSet()
rw.seen = make(map[string]int)
}
key := string(c.Bytes())
oldDepth, ok := rw.seen[key]
// Unique == true && depth < MaxDepth (or unlimited) from this point
// Branch pruning cases:
// - We saw the Cid before and either:
// - Depth is unlimited (MaxDepth = -1)
// - We saw it higher (smaller depth) in the DAG (means we must have
// explored deep enough before)
// Because we saw the CID, we don't print it again.
if ok && (rw.MaxDepth < 0 || oldDepth <= depth) {
return false, false
}
has := rw.seen.Has(c)
if !has {
rw.seen.Add(c)
}
return has
// Final case, we must keep exploring the DAG from this CID
// (unless we hit the depth limit).
// We note down its depth because it was either not seen
// or is lower than last time.
// We print if it was not seen.
rw.seen[key] = depth
return !atMaxDepth, !ok
}
// Write one edge

176
test/sharness/t0095-refs.sh Executable file
View File

@ -0,0 +1,176 @@
#!/usr/bin/env bash
#
# Copyright (c) 2018 Protocol Labs, Inc
# MIT Licensed; see the LICENSE file in this repository.
#
test_description="Test 'ipfs refs' command"
. lib/test-lib.sh
test_init_ipfs
test_launch_ipfs_daemon --offline
# This file performs tests with the following directory
# structure.
#
# L0- _______ A_________
# / | \ \
# L1- B C D 1.txt
# / \ | |
# L2- D 1.txt B 2.txt
# | / \
# L3- 2.txt D 1.txt
# |
# L4- 2.txt
#
# 'ipfs add -r A' output:
#
# added QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v A/1.txt
# added QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v A/B/1.txt
# added QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 A/B/D/2.txt
# added QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v A/C/B/1.txt
# added QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 A/C/B/D/2.txt
# added QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 A/D/2.txt
# added QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS A/B/D
# added QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa A/B
# added QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS A/C/B/D
# added QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa A/C/B
# added QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH A/C
# added QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS A/D
# added QmU6xujRsYzcrkocuR3fhfnkZBB8eyUFFq4WKRGw2aS15h A
#
# 'ipfs refs -r QmU6xujRsYzcrkocuR3fhfnkZBB8eyUFFq4WKRGw2aS15h' sample output
# that shows visit order in a stable go-ipfs version:
#
# QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v - 1.txt
# QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa - B (A/B)
# QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v - 1.txt (A/B/1.txt)
# QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS - D (A/B/D)
# QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 - 2.txt (A/B/D/2.txt)
# QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH - C (A/C)
# QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa - B (A/C/B)
# QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v - 1.txt (A/C/B/1.txt)
# QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS - D (A/C/B/D)
# QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 - 2.txt (A/C/B/D/2.txt)
# QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS - D (A/D)
# QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61 - 2.txt (A/D/2.txt)
refsroot=QmU6xujRsYzcrkocuR3fhfnkZBB8eyUFFq4WKRGw2aS15h
test_expect_success "create and add folders for refs" '
mkdir -p A/B/D A/C/B/D A/D
echo "1" > A/1.txt
echo "1" > A/B/1.txt
echo "1" > A/C/B/1.txt
echo "2" > A/B/D/2.txt
echo "2" > A/C/B/D/2.txt
echo "2" > A/D/2.txt
root=$(ipfs add -r -Q A)
[[ "$root" == "$refsroot" ]]
'
test_expect_success "ipfs refs -r" '
cat <<EOF > expected.txt
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61
QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH
QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61
EOF
ipfs refs -r $refsroot > refsr.txt
test_cmp expected.txt refsr.txt
'
# Unique is like above but removing duplicates
test_expect_success "ipfs refs -r --unique" '
cat <<EOF > expected.txt
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61
QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH
EOF
ipfs refs -r --unique $refsroot > refsr.txt
test_cmp expected.txt refsr.txt
'
# First level is 1.txt, B, C, D
test_expect_success "ipfs refs" '
cat <<EOF > expected.txt
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa
QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
EOF
ipfs refs $refsroot > refs.txt
test_cmp expected.txt refs.txt
'
# max-depth=0 should return an empty list
test_expect_success "ipfs refs -r --max-depth=0" '
cat <<EOF > expected.txt
EOF
ipfs refs -r --max-depth=0 $refsroot > refs.txt
test_cmp expected.txt refs.txt
'
# max-depth=1 should be equivalent to running without -r
test_expect_success "ipfs refs -r --max-depth=1" '
ipfs refs -r --max-depth=1 $refsroot > refsr.txt
ipfs refs $refsroot > refs.txt
test_cmp refsr.txt refs.txt
'
# We should see the depth limit engage at level 2
test_expect_success "ipfs refs -r --max-depth=2" '
cat <<EOF > expected.txt
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH
QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61
EOF
ipfs refs -r --max-depth=2 $refsroot > refsr.txt
test_cmp refsr.txt expected.txt
'
# Here branch pruning and re-exploration come into place
# At first it should see D at level 2 and don't go deeper.
# But then after doing C it will see D at level 1 and go deeper
# so that it outputs the hash for 2.txt (-q61).
# We also see that C/B is pruned as it's been shown before.
#
# Excerpt from diagram above:
#
# L0- _______ A_________
# / | \ \
# L1- B C D 1.txt
# / \ | |
# L2- D 1.txt B 2.txt
test_expect_success "ipfs refs -r --unique --max-depth=2" '
cat <<EOF > expected.txt
QmdytmR4wULMd3SLo6ePF4s3WcRHWcpnJZ7bHhoj3QB13v
QmNkQvpiyAEtbeLviC7kqfifYoK1GXPcsSxTpP1yS3ykLa
QmSanP5DpxpqfDdS4yekHY1MqrVge47gtxQcp2e2yZ4UwS
QmXXazTjeNCKFnpW1D65vTKsTs8fbgkCWTv8Em4pdK2coH
QmSFxnK675wQ9Kc1uqWKyJUaNxvSc2BP5DbXCD3x93oq61
EOF
ipfs refs -r --unique --max-depth=2 $refsroot > refsr.txt
test_cmp refsr.txt expected.txt
'
test_done