Skip to content

Commit 947b624

Browse files
committed
repo: Add ignore_revs.
Support to write a list of line-separated revisions to `.dvc/ignore_revs`. If `.dvc/ignore_revs` exists, parse the revisions and use them in `brancher` to skip. Allows to skip broken commits in `gc`, `pull` and `push`. Closes #5037 Closes #5066 Closes #7585
1 parent a9d01a0 commit 947b624

File tree

6 files changed

+111
-2
lines changed

6 files changed

+111
-2
lines changed

Diff for: dvc/repo/__init__.py

+14
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ def _ignore(self):
381381
def brancher(self, *args, **kwargs):
382382
from dvc.repo.brancher import brancher
383383

384+
kwargs["ignore_revs"] = self.ignore_revs
384385
return brancher(self, *args, **kwargs)
385386

386387
def used_objs(
@@ -448,6 +449,19 @@ def used_objs(
448449
def stages(self): # obsolete, only for backward-compatibility
449450
return self.index.stages
450451

452+
@property
453+
def ignore_revs_file(self):
454+
if self.dvc_dir:
455+
return self.fs.path.join(self.dvc_dir, "ignore_revs")
456+
return ""
457+
458+
@property
459+
def ignore_revs(self):
460+
if self.fs.exists(self.ignore_revs_file):
461+
with self.fs.open(self.ignore_revs_file) as f:
462+
return set(f.read().strip().splitlines())
463+
return set()
464+
451465
def find_outs_by_path(self, path, outs=None, recursive=False, strict=True):
452466
# using `outs_graph` to ensure graph checks are run
453467
outs = outs or self.index.outs_graph

Diff for: dvc/repo/brancher.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional
1+
from typing import Optional, Set
22

33
from dvc.scm import iter_revs
44

@@ -13,6 +13,7 @@ def brancher( # noqa: E302
1313
commit_date: Optional[str] = None,
1414
sha_only=False,
1515
num=1,
16+
ignore_revs: Optional[Set[str]] = None,
1617
):
1718
"""Generator that iterates over specified revisions.
1819
@@ -71,6 +72,7 @@ def brancher( # noqa: E302
7172
all_experiments=all_experiments,
7273
commit_date=commit_date,
7374
num=num,
75+
ignore_revs=ignore_revs,
7476
)
7577

7678
try:

Diff for: tests/conftest.py

+14
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,17 @@ def run(
250250
return stage
251251

252252
return run
253+
254+
255+
@pytest.fixture
256+
def broken_rev(tmp_dir, scm, dvc):
257+
tmp_dir.gen("params.yaml", "foo: 1")
258+
dvc.run(cmd="echo ${foo}", name="foo")
259+
260+
scm.add(["dvc.yaml", "dvc.lock"])
261+
scm.commit("init broken")
262+
_broken_rev = scm.get_rev()
263+
264+
scm.add(["params.yaml"])
265+
scm.commit("fixed")
266+
return _broken_rev

Diff for: tests/func/test_data_cloud.py

+31
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import dvc_data
99
from dvc.cli import main
10+
from dvc.exceptions import DvcException
1011
from dvc.external_repo import clean_repos
1112
from dvc.stage.exceptions import StageNotFound
1213
from dvc.testing.test_remote import ( # noqa, pylint: disable=unused-import
@@ -461,6 +462,36 @@ def test_push_pull_all(tmp_dir, scm, dvc, local_remote, key, expected):
461462
assert dvc.pull(**{key: True})["fetched"] == expected
462463

463464

465+
def test_push_pull_ignore_revs(tmp_dir, scm, dvc, local_remote, broken_rev):
466+
tmp_dir.dvc_gen({"foo": "foo"}, commit="first")
467+
scm.tag("v1")
468+
dvc.remove("foo.dvc")
469+
tmp_dir.dvc_gen({"bar": "bar"}, commit="second")
470+
scm.tag("v2")
471+
with tmp_dir.branch("branch", new=True):
472+
dvc.remove("bar.dvc")
473+
tmp_dir.dvc_gen({"baz": "baz"}, commit="branch")
474+
475+
with pytest.raises(DvcException):
476+
dvc.push(all_commits=True)
477+
478+
with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
479+
f.write(broken_rev)
480+
481+
assert dvc.push(all_commits=True) == 3
482+
483+
remove(dvc.ignore_revs_file)
484+
clean(["foo", "bar", "baz"], dvc)
485+
486+
with pytest.raises(DvcException):
487+
dvc.pull(all_commits=True)
488+
489+
with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
490+
f.write(broken_rev)
491+
492+
assert dvc.pull(all_commits=True)["fetched"] == 3
493+
494+
464495
def test_push_pull_fetch_pipeline_stages(tmp_dir, dvc, run_copy, local_remote):
465496
tmp_dir.dvc_gen("foo", "foo")
466497
run_copy("foo", "bar", no_commit=True, name="copy-foo-bar")

Diff for: tests/func/test_gc.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from git import Repo
88

99
from dvc.cli import main
10-
from dvc.exceptions import CollectCacheError
10+
from dvc.exceptions import CollectCacheError, DvcException
1111
from dvc.fs import LocalFileSystem
1212
from dvc.repo import Repo as DvcRepo
1313
from dvc.utils.fs import remove
@@ -412,3 +412,24 @@ def test_gc_rev_num(tmp_dir, scm, dvc):
412412
assert not cache.exists()
413413
else:
414414
assert cache.read_text() == str(i)
415+
416+
417+
def test_gc_ignore_revs(tmp_dir, dvc, broken_rev):
418+
"""Covers #5037 and #7585"""
419+
uncommitted = tmp_dir.dvc_gen("testfile", "uncommitted")
420+
uncommitted_hash = uncommitted[0].outs[0].hash_info.value
421+
tmp_dir.dvc_gen("testfile", "committed", commit="add testfile")
422+
423+
cache = tmp_dir / ".dvc" / "cache"
424+
uncommitted_cache = cache / uncommitted_hash[:2] / uncommitted_hash[2:]
425+
426+
with pytest.raises(DvcException, match="Could not find 'foo'"):
427+
dvc.gc(all_commits=True)
428+
429+
assert uncommitted_cache.exists()
430+
431+
with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
432+
f.write(broken_rev)
433+
dvc.gc(all_commits=True)
434+
435+
assert not uncommitted_cache.exists()

Diff for: tests/unit/repo/test_repo.py

+27
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,33 @@ def test_used_objs(tmp_dir, dvc, path):
5959
assert used == expected
6060

6161

62+
def test_used_objs_ignore_revs(tmp_dir, scm, dvc):
63+
def _get_used_values(dvc):
64+
used_obj_ids = list(dvc.used_objs(all_commits=True).values())[0]
65+
return {o.value for o in used_obj_ids}
66+
67+
obj_ids = {}
68+
revs = {}
69+
for i in range(3):
70+
o = tmp_dir.dvc_gen("foo", str(i), commit=str(i))
71+
obj_ids[i] = o[0].outs[0].hash_info.value
72+
revs[i] = scm.get_rev()
73+
74+
assert set(obj_ids.values()) == _get_used_values(dvc)
75+
76+
with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
77+
f.write(revs[0])
78+
79+
expected = {obj_ids[1], obj_ids[2]}
80+
assert expected == _get_used_values(dvc)
81+
82+
with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
83+
f.write(f"{revs[0]}\n{revs[1]}")
84+
85+
expected = {obj_ids[2]}
86+
assert expected == _get_used_values(dvc)
87+
88+
6289
def test_locked(mocker):
6390
repo = mocker.MagicMock()
6491
repo._lock_depth = 0

0 commit comments

Comments
 (0)