Skip to content

repo: Add ignore_revs. #8098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ def _ignore(self):
def brancher(self, *args, **kwargs):
from dvc.repo.brancher import brancher

kwargs["ignore_revs"] = self.ignore_revs
return brancher(self, *args, **kwargs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about

return brancher(self, *args, ignore_revs=self.ignore_revs, **kwargs)


def used_objs(
Expand Down Expand Up @@ -489,6 +490,19 @@ def partial_imports(
def stages(self): # obsolete, only for backward-compatibility
return self.index.stages

@property
def ignore_revs_file(self):
if self.dvc_dir:
return self.fs.path.join(self.dvc_dir, "ignore_revs")

@property
def ignore_revs(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should clarify what is ignore_revs file for an external repository. Do we need to read from the default branch or just the one we checked out? If we need to read from default branch, it might not be possible with shallow clones AFAICT.

Also what should happen if user asks for that particular revision (eg: plots diff <ignored_rev>)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should clarify what is ignore_revs file for an external repository.

Not sure I follow. Could you elaborate the use case?

Do we need to read from the default branch or just the one we checked out? If we need to read from default branch, it might not be possible with shallow clones AFAICT.

I believe we should read from the current revision. Treat ignore-revs as if it was part of config.

Also what should happen if user asks for that particular revision (eg: plots diff <ignored_rev>)?

We will skip the revision.
The use case is to use ignore_revs for broken revisions.
If a revision is broken, plots diff won't work (see #6150) so if we don't skip user will encounter the error?

if self.ignore_revs_file is not None:
if self.fs.exists(self.ignore_revs_file):
with self.fs.open(self.ignore_revs_file) as f:
return set(f.read().strip().splitlines())
return set()

def find_outs_by_path(self, path, outs=None, recursive=False, strict=True):
# using `outs_graph` to ensure graph checks are run
outs = outs or self.index.outs_graph
Expand Down
4 changes: 3 additions & 1 deletion dvc/repo/brancher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import Optional, Set

from dvc.scm import iter_revs

Expand All @@ -13,6 +13,7 @@ def brancher( # noqa: E302
commit_date: Optional[str] = None,
sha_only=False,
num=1,
ignore_revs: Optional[Set[str]] = None,
):
"""Generator that iterates over specified revisions.

Expand Down Expand Up @@ -71,6 +72,7 @@ def brancher( # noqa: E302
all_experiments=all_experiments,
commit_date=commit_date,
num=num,
ignore_revs=ignore_revs,
)

try:
Expand Down
8 changes: 6 additions & 2 deletions dvc/scm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
from contextlib import contextmanager
from functools import partial
from typing import TYPE_CHECKING, Iterator, List, Mapping, Optional
from typing import TYPE_CHECKING, Iterator, List, Mapping, Optional, Set

from funcy import group_by
from scmrepo.base import Base # noqa: F401, pylint: disable=unused-import
Expand Down Expand Up @@ -204,6 +204,7 @@ def iter_revs(
all_commits: bool = False,
all_experiments: bool = False,
commit_date: Optional[str] = None,
ignore_revs: Optional[Set[str]] = None,
) -> Mapping[str, List[str]]:
from scmrepo.exceptions import SCMError as _SCMError

Expand All @@ -222,6 +223,7 @@ def iter_revs(
return {}

revs = revs or []
ignore_revs = ignore_revs or set()
results: List[str] = _get_n_commits(scm, revs, num)

if all_commits:
Expand Down Expand Up @@ -254,4 +256,6 @@ def _time_filter(rev):
results.extend(exp_commits(scm))

rev_resolver = partial(resolve_rev, scm)
return group_by(rev_resolver, results)
grouped = group_by(rev_resolver, results)

return {k: v for k, v in grouped.items() if k not in ignore_revs}
14 changes: 14 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,20 @@ def run(
return run


@pytest.fixture
def broken_rev(tmp_dir, scm, dvc):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need a broken rev and the whole functional test. Instead, just check that if the config file made the desired revision disappear from iter_revs with a unit test is enough.

Copy link
Contributor Author

@daavoo daavoo Aug 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

disappear from iter_revs with a unit test is enough

I think it's not enough in this case.
If the underlying functionality of / gc / pull/ push` is changed (i.e. brancher usage is removed) there won't be any test indicating that the ignore_revs have stopped working for those commands.

tmp_dir.gen("params.yaml", "foo: 1")
dvc.run(cmd="echo ${foo}", name="foo")

scm.add(["dvc.yaml", "dvc.lock"])
scm.commit("init broken")
_broken_rev = scm.get_rev()

scm.add(["params.yaml"])
scm.commit("fixed")
return _broken_rev


@pytest.fixture(autouse=True)
def mock_hydra_conf(mocker):
if sys.version_info < (3, 11):
Expand Down
31 changes: 31 additions & 0 deletions tests/func/test_data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import dvc_data
from dvc.cli import main
from dvc.exceptions import DvcException
from dvc.external_repo import clean_repos
from dvc.stage.exceptions import StageNotFound
from dvc.testing.remote_tests import ( # noqa, pylint: disable=unused-import
Expand Down Expand Up @@ -476,6 +477,36 @@ def test_push_pull_all(tmp_dir, scm, dvc, local_remote, key, expected):
assert dvc.pull(**{key: True})["fetched"] == expected


def test_push_pull_ignore_revs(tmp_dir, scm, dvc, local_remote, broken_rev):
tmp_dir.dvc_gen({"foo": "foo"}, commit="first")
scm.tag("v1")
dvc.remove("foo.dvc")
tmp_dir.dvc_gen({"bar": "bar"}, commit="second")
scm.tag("v2")
with tmp_dir.branch("branch", new=True):
dvc.remove("bar.dvc")
tmp_dir.dvc_gen({"baz": "baz"}, commit="branch")

with pytest.raises(DvcException):
dvc.push(all_commits=True)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(broken_rev)

assert dvc.push(all_commits=True) == 3

remove(dvc.ignore_revs_file)
clean(["foo", "bar", "baz"], dvc)

with pytest.raises(DvcException):
dvc.pull(all_commits=True)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(broken_rev)

assert dvc.pull(all_commits=True)["fetched"] == 3


def test_push_pull_fetch_pipeline_stages(tmp_dir, dvc, run_copy, local_remote):
tmp_dir.dvc_gen("foo", "foo")
run_copy("foo", "bar", no_commit=True, name="copy-foo-bar")
Expand Down
23 changes: 22 additions & 1 deletion tests/func/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from git import Repo

from dvc.cli import main
from dvc.exceptions import CollectCacheError
from dvc.exceptions import CollectCacheError, DvcException
from dvc.fs import LocalFileSystem
from dvc.repo import Repo as DvcRepo
from dvc.utils.fs import remove
Expand Down Expand Up @@ -412,3 +412,24 @@ def test_gc_rev_num(tmp_dir, scm, dvc):
assert not cache.exists()
else:
assert cache.read_text() == str(i)


def test_gc_ignore_revs(tmp_dir, dvc, broken_rev):
"""Covers #5037 and #7585"""
uncommitted = tmp_dir.dvc_gen("testfile", "uncommitted")
uncommitted_hash = uncommitted[0].outs[0].hash_info.value
tmp_dir.dvc_gen("testfile", "committed", commit="add testfile")

cache = tmp_dir / ".dvc" / "cache"
uncommitted_cache = cache / uncommitted_hash[:2] / uncommitted_hash[2:]

with pytest.raises(DvcException, match="Could not find 'foo'"):
dvc.gc(all_commits=True)

assert uncommitted_cache.exists()

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(broken_rev)
dvc.gc(all_commits=True)

assert not uncommitted_cache.exists()
27 changes: 27 additions & 0 deletions tests/unit/repo/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,33 @@ def test_used_objs(tmp_dir, dvc, path):
assert used == expected


def test_used_objs_ignore_revs(tmp_dir, scm, dvc):
def _get_used_values(dvc):
used_obj_ids = list(dvc.used_objs(all_commits=True).values())[0]
return {o.value for o in used_obj_ids}

obj_ids = {}
revs = {}
for i in range(3):
o = tmp_dir.dvc_gen("foo", str(i), commit=str(i))
obj_ids[i] = o[0].outs[0].hash_info.value
revs[i] = scm.get_rev()

assert set(obj_ids.values()) == _get_used_values(dvc)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(revs[0])

expected = {obj_ids[1], obj_ids[2]}
assert expected == _get_used_values(dvc)

with dvc.fs.open(dvc.ignore_revs_file, "w") as f:
f.write(f"{revs[0]}\n{revs[1]}")

expected = {obj_ids[2]}
assert expected == _get_used_values(dvc)


def test_locked(mocker):
repo = mocker.MagicMock()
repo._lock_depth = 0
Expand Down
37 changes: 31 additions & 6 deletions tests/unit/scm/test_scm.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
from datetime import datetime

import pytest
from scmrepo.exceptions import SCMError

from dvc.repo.experiments import ExpRefInfo
from dvc.scm import GitMergeError, iter_revs


def test_iter_revs(
tmp_dir,
scm,
mocker,
):
@pytest.fixture
def setup_revs(tmp_dir, scm):
"""
new other
│ │
Expand All @@ -37,6 +35,12 @@ def test_iter_revs(
ref = ExpRefInfo(rev_root, "exp2")
scm.set_ref(str(ref), rev_old)

return old, rev_root, rev_old, rev_new, rev_other


def test_iter_revs_rev_num(scm, setup_revs):
_, rev_root, rev_old, rev_new, rev_other = setup_revs

gen = iter_revs(scm, [rev_root, "new"], 1)
assert gen == {rev_root: [rev_root], rev_new: ["new"]}
gen = iter_revs(scm, ["new"], 2)
Expand All @@ -49,6 +53,11 @@ def test_iter_revs(
}
gen = iter_revs(scm, ["tag"])
assert gen == {rev_old: ["tag"]}


def test_iter_revs_all(scm, setup_revs):
old, rev_root, rev_old, rev_new, rev_other = setup_revs

gen = iter_revs(scm, all_branches=True)
assert gen == {rev_old: [old], rev_new: ["new"], rev_other: ["other"]}
gen = iter_revs(scm, all_tags=True)
Expand All @@ -67,8 +76,11 @@ def test_iter_revs(
rev_root: [rev_root],
}

def _func(rev):

def test_iter_revs_commit_date(scm, setup_revs, mocker):
_, rev_root, rev_old, rev_new, rev_other = setup_revs

def _func(rev):
if rev == rev_root:
return mocker.Mock(commit_date=datetime(2022, 6, 28).timestamp())
if rev == rev_old:
Expand All @@ -87,6 +99,19 @@ def _func(rev):
}


def test_iter_revs_ignore(scm, setup_revs):
_, rev_root, rev_old, rev_new, rev_other = setup_revs

gen = iter_revs(scm, all_commits=True, ignore_revs={rev_old, rev_new})
assert gen == {rev_other: [rev_other], rev_root: [rev_root]}
gen = iter_revs(scm, all_branches=True, ignore_revs={rev_old, rev_new})
assert gen == {rev_other: ["other"]}
gen = iter_revs(scm, all_tags=True, ignore_revs={rev_old, rev_new})
assert gen == {}
gen = iter_revs(scm, all_experiments=True, ignore_revs={rev_old, rev_new})
assert gen == {rev_root: [rev_root]}


def test_merge_error(tmp_dir, scm):
exc = GitMergeError("Merge failed")
assert "shallow" not in str(exc)
Expand Down