Skip to content

Commit b616342

Browse files
committed
ENH: Extend rsync-like diff(path='subds/') to path constraints within the subds
When called with a trailing slash in a path argument matching a subdataset, `diff()` will report on the content, not only on the subdataset record in its superdataset -- even with `recursive=False`. While this is not universally loved, it makes sense to not limit this behavior to a single trailing slash. When `subds/` reports on the entire subdataset, `subds/subdir` will now only report on the content within `subdir` of `subds`.
1 parent 55947c1 commit b616342

File tree

2 files changed

+28
-6
lines changed

2 files changed

+28
-6
lines changed

datalad/core/local/diff.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ def _diff_ds(ds, fr, to, constant_refs, recursion_level, origpaths, untracked,
306306
if ds.pathobj in p.parents or (p == ds.pathobj and goinside)
307307
)
308308
try:
309-
lgr.debug("diff %s from '%s' to '%s'", ds, fr, to)
309+
lgr.debug("Diff %s from '%s' to '%s'", ds, fr, to)
310310
diff_state = repo.diffstatus(
311311
fr,
312312
to,
@@ -350,10 +350,17 @@ def _diff_ds(ds, fr, to, constant_refs, recursion_level, origpaths, untracked,
350350
parentds=ds.path,
351351
status='ok',
352352
)
353-
# if a dataset, and given in rsync-style 'ds/' or with sufficient
354-
# recursion level left -> dive in
353+
# for a dataset we need to decide whether to dive in, or not
355354
if props.get('type', None) == 'dataset' and (
356-
(paths and paths.get(path, False)) or recursion_level != 0):
355+
# subdataset path was given in rsync-style 'ds/'
356+
(paths and paths.get(path, False))
357+
# there is still sufficient recursion level left
358+
or recursion_level != 0
359+
# no recursion possible anymore, but one of the given
360+
# path arguments is in this subdataset
361+
or (recursion_level == 0
362+
and paths
363+
and any(path in p.parents for p in paths))):
357364
subds_state = props.get('state', None)
358365
if subds_state in ('clean', 'deleted'):
359366
# no need to look into the subdataset
@@ -378,7 +385,10 @@ def _diff_ds(ds, fr, to, constant_refs, recursion_level, origpaths, untracked,
378385
# subtract on level on the way down, unless the path
379386
# args instructed to go inside this subdataset
380387
recursion_level=recursion_level
381-
if paths and paths.get(path, False) else recursion_level - 1,
388+
# protect against dropping below zero (would mean unconditional
389+
# recursion)
390+
if not recursion_level or (paths and paths.get(path, False))
391+
else recursion_level - 1,
382392
origpaths=origpaths,
383393
untracked=untracked,
384394
annexinfo=annexinfo,

datalad/core/local/tests/test_diff.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from datalad.consts import PRE_INIT_COMMIT_SHA
2323
from datalad.cmd import GitRunner
2424
from datalad.utils import (
25+
Path,
2526
on_windows,
2627
)
2728
from datalad.tests.utils import (
@@ -36,6 +37,7 @@
3637
get_deeply_nested_structure,
3738
has_symlink_capability,
3839
known_failure_githubci_win,
40+
neq_,
3941
OBSCURE_FILENAME,
4042
ok_,
4143
SkipTest,
@@ -451,7 +453,7 @@ def test_diff_rsync_syntax(path):
451453
# three nested datasets
452454
ds = Dataset(path).create()
453455
subds = ds.create('sub')
454-
subsubds = subds.create('deep')
456+
subsubds = subds.create(Path('subdir', 'deep'))
455457
justtop = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub')
456458
# we only get a single result, the subdataset in question
457459
assert_result_count(justtop, 1)
@@ -463,6 +465,16 @@ def test_diff_rsync_syntax(path):
463465
assert_result_count(inside, 1, type='dataset', path=subds.path)
464466
assert_result_count(inside, 1, type='dataset', path=subsubds.path)
465467
assert_result_count(inside, 0, type='file', parentds=subsubds.path)
468+
# if we point to the subdir in 'sub' the reporting wrt the subsubds
469+
# doesn't change. It is merely a path constraint within the queried
470+
# subds, but because the subsubds is still underneath it, nothing changes
471+
inside_subdir = ds.diff(fr=PRE_INIT_COMMIT_SHA, path=op.join('sub', 'subdir'))
472+
assert_result_count(inside_subdir, 2, type='dataset')
473+
assert_result_count(inside_subdir, 1, type='dataset', path=subds.path)
474+
assert_result_count(inside_subdir, 1, type='dataset', path=subsubds.path)
475+
assert_result_count(inside_subdir, 0, type='file', parentds=subsubds.path)
476+
# but the rest is different (e.g. all the stuff in .datalad is gone)
477+
neq_(inside, inside_subdir)
466478
# just for completeness, we get more when going full recursive
467479
rec = ds.diff(fr=PRE_INIT_COMMIT_SHA, recursive=True, path='sub' + os.sep)
468480
assert(len(inside) < len(rec))

0 commit comments

Comments
 (0)