Skip to content

Commit 7bdf840

Browse files
Raise early for invalid revision in load_dataset (#7929)
* Add test for checking invalid revision handling * Fix load_dataset ignoring invalid revision when cache exists
1 parent 06b6e02 commit 7bdf840

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

src/datasets/load.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -979,10 +979,6 @@ def dataset_module_factory(
979979
elif e.response.status_code == 403:
980980
message += f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access."
981981
raise DatasetNotFoundError(message) from e
982-
except RevisionNotFoundError as e:
983-
raise DatasetNotFoundError(
984-
f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub."
985-
) from e
986982
except RepositoryNotFoundError as e:
987983
raise DatasetNotFoundError(f"Dataset '{path}' doesn't exist on the Hub or cannot be accessed.") from e
988984
try:
@@ -1016,10 +1012,8 @@ def dataset_module_factory(
10161012
elif e.response.status_code == 403:
10171013
message += f" Visit the dataset page at https://huggingface.co/datasets/{path} to ask for access."
10181014
raise DatasetNotFoundError(message) from e
1019-
except RevisionNotFoundError as e:
1020-
raise DatasetNotFoundError(
1021-
f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub."
1022-
) from e
1015+
except RevisionNotFoundError as e:
1016+
raise DatasetNotFoundError(f"Revision '{revision}' doesn't exist for dataset '{path}' on the Hub.") from e
10231017
except Exception as e1:
10241018
# All the attempts failed, before raising the error we should check if the module is already cached
10251019
try:

tests/test_load.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,18 @@ def test_load_dataset_from_hub(self):
763763
str(context.exception),
764764
)
765765

766+
@pytest.mark.integration
767+
def test_load_dataset_invalid_revision_with_cache(self):
768+
repo_id = SAMPLE_DATASET_IDENTIFIER2
769+
builder = load_dataset_builder(repo_id, cache_dir=self.cache_dir)
770+
builder.download_and_prepare()
771+
with self.assertRaises(DatasetNotFoundError) as context:
772+
datasets.load_dataset(repo_id, revision="invalid_revision", cache_dir=self.cache_dir)
773+
self.assertIn(
774+
"Revision 'invalid_revision' doesn't exist for dataset",
775+
str(context.exception),
776+
)
777+
766778
def test_load_dataset_namespace(self):
767779
with self.assertRaises(DatasetNotFoundError) as context:
768780
datasets.load_dataset("hf-internal-testing/_dummy")

0 commit comments

Comments
 (0)