Skip to content

Commit e1e969d

Browse files
committed
Revert "Add Torchdata as a requirement and remove conditional imports of Torchdata (#1961) (#1962)"
This reverts commit 771b5a2.
1 parent a075bcc commit e1e969d

34 files changed

+151
-88
lines changed

packaging/torchtext/meta.yaml

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ requirements:
2323
- python
2424
- requests
2525
- tqdm
26-
- torchdata
2726
{{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
2827

2928
build:

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def run(self):
100100
description="Text utilities and datasets for PyTorch",
101101
long_description=read("README.rst"),
102102
license="BSD",
103-
install_requires=["tqdm", "requests", pytorch_package_dep, "numpy", "torchdata"],
103+
install_requires=["tqdm", "requests", pytorch_package_dep, "numpy"],
104104
python_requires=">=3.7",
105105
classifiers=[
106106
"Programming Language :: Python :: 3.7",

torchtext/_download_hooks.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44

55
# This is to allow monkey-patching in fbcode
66
from torch.hub import load_state_dict_from_url # noqa
7-
from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401
7+
from torchtext._internal.module_utils import is_module_available
88
from tqdm import tqdm
99

10+
if is_module_available("torchdata"):
11+
from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401
12+
1013

1114
def _stream_response(r, chunk_size=16 * 1024):
1215
total_size = int(r.headers.get("Content-length", 0))

torchtext/datasets/ag_news.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6-
from torchtext._download_hooks import HttpReader
75
from torchtext._internal.module_utils import is_module_available
86
from torchtext.data.datasets_utils import (
97
_wrap_split_argument,
108
_create_dataset_directory,
119
)
1210

11+
if is_module_available("torchdata"):
12+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13+
from torchtext._download_hooks import HttpReader
14+
1315
URL = {
1416
"train": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv",
1517
"test": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv",

torchtext/datasets/amazonreviewfull.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6-
from torchtext._download_hooks import GDriveReader
75
from torchtext._internal.module_utils import is_module_available
86
from torchtext.data.datasets_utils import (
97
_wrap_split_argument,
108
_create_dataset_directory,
119
)
1210

11+
if is_module_available("torchdata"):
12+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13+
from torchtext._download_hooks import GDriveReader
14+
1315
URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA"
1416

1517
MD5 = "57d28bd5d930e772930baddf36641c7c"

torchtext/datasets/amazonreviewpolarity.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6-
from torchtext._download_hooks import GDriveReader
75
from torchtext._internal.module_utils import is_module_available
86
from torchtext.data.datasets_utils import (
97
_wrap_split_argument,
108
_create_dataset_directory,
119
)
1210

11+
if is_module_available("torchdata"):
12+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13+
from torchtext._download_hooks import GDriveReader
14+
1315
URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM"
1416

1517
MD5 = "fe39f8b653cada45afd5792e0f0e8f9b"

torchtext/datasets/cc100.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import os.path
22
from functools import partial
33

4-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5-
from torchtext._download_hooks import HttpReader
4+
from torchtext._internal.module_utils import is_module_available
65
from torchtext.data.datasets_utils import (
76
_create_dataset_directory,
87
)
98

9+
if is_module_available("torchdata"):
10+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
11+
from torchtext._download_hooks import HttpReader
12+
1013
URL = "http://data.statmt.org/cc-100/%s.txt.xz"
1114

1215
VALID_CODES = {

torchtext/datasets/cnndm.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
from functools import partial
44
from typing import Union, Set, Tuple
55

6-
from torchdata.datapipes.iter import (
7-
FileOpener,
8-
IterableWrapper,
9-
OnlineReader,
10-
GDriveReader,
11-
)
126
from torchtext._internal.module_utils import is_module_available
137
from torchtext.data.datasets_utils import (
148
_wrap_split_argument,
159
_create_dataset_directory,
1610
)
1711

12+
if is_module_available("torchdata"):
13+
from torchdata.datapipes.iter import (
14+
FileOpener,
15+
IterableWrapper,
16+
OnlineReader,
17+
GDriveReader,
18+
)
19+
1820
DATASET_NAME = "CNNDM"
1921

2022
SPLIT_LIST = {

torchtext/datasets/cola.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
from functools import partial
44
from typing import Union, Tuple
55

6-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7-
from torchtext._download_hooks import HttpReader
86
from torchtext._internal.module_utils import is_module_available
97
from torchtext.data.datasets_utils import _create_dataset_directory, _wrap_split_argument
108

9+
if is_module_available("torchdata"):
10+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
11+
from torchtext._download_hooks import HttpReader
12+
1113
URL = "https://nyu-mll.github.io/CoLA/cola_public_1.1.zip"
1214

1315
MD5 = "9f6d88c3558ec424cd9d66ea03589aba"

torchtext/datasets/conll2000chunking.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6-
from torchtext._download_hooks import HttpReader
75
from torchtext._internal.module_utils import is_module_available
86
from torchtext.data.datasets_utils import (
97
_wrap_split_argument,
108
_create_dataset_directory,
119
)
1210

11+
if is_module_available("torchdata"):
12+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13+
from torchtext._download_hooks import HttpReader
14+
1315
URL = {
1416
"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz",
1517
"test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz",

torchtext/datasets/dbpedia.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6-
from torchtext._download_hooks import GDriveReader
75
from torchtext._internal.module_utils import is_module_available
86
from torchtext.data.datasets_utils import (
97
_wrap_split_argument,
108
_create_dataset_directory,
119
)
1210

11+
if is_module_available("torchdata"):
12+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13+
from torchtext._download_hooks import GDriveReader
14+
1315
URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k"
1416

1517
MD5 = "dca7b1ae12b1091090db52aa7ec5ca64"

torchtext/datasets/enwik9.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os
22
from functools import partial
33

4-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5-
from torchtext._download_hooks import HttpReader
64
from torchtext._internal.module_utils import is_module_available
75
from torchtext.data.datasets_utils import _create_dataset_directory
86

7+
if is_module_available("torchdata"):
8+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
9+
from torchtext._download_hooks import HttpReader
10+
911
URL = "http://mattmahoney.net/dc/enwik9.zip"
1012

1113
MD5 = "3e773f8a1577fda2e27f871ca17f31fd"

torchtext/datasets/imdb.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
from pathlib import Path
44
from typing import Tuple, Union
55

6-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7-
from torchtext._download_hooks import HttpReader
86
from torchtext._internal.module_utils import is_module_available
97
from torchtext.data.datasets_utils import _create_dataset_directory
108
from torchtext.data.datasets_utils import _wrap_split_argument
119

10+
if is_module_available("torchdata"):
11+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
12+
from torchtext._download_hooks import HttpReader
13+
1214
URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
1315

1416
MD5 = "7c2ac02c03563afcf9b574c7e56c153a"

torchtext/datasets/iwslt2016.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import os
22
from functools import partial
33

4-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5-
from torchtext._download_hooks import GDriveReader
64
from torchtext._internal.module_utils import is_module_available
75
from torchtext.data.datasets_utils import (
86
_clean_files,
@@ -11,6 +9,10 @@
119
_wrap_split_argument,
1210
)
1311

12+
if is_module_available("torchdata"):
13+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14+
from torchtext._download_hooks import GDriveReader
15+
1416
URL = "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8"
1517

1618
_PATH = "2016-01.tgz"

torchtext/datasets/iwslt2017.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import os
22
from functools import partial
33

4-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5-
from torchtext._download_hooks import GDriveReader
64
from torchtext._internal.module_utils import is_module_available
75
from torchtext.data.datasets_utils import (
86
_clean_files,
@@ -11,6 +9,10 @@
119
_wrap_split_argument,
1210
)
1311

12+
if is_module_available("torchdata"):
13+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14+
from torchtext._download_hooks import GDriveReader
15+
1416
URL = "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp"
1517
_PATH = "2017-01-trnmted.tgz"
1618
MD5 = "aca701032b1c4411afc4d9fa367796ba"

torchtext/datasets/mnli.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@
33
import os
44
from functools import partial
55

6-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7-
8-
# we import HttpReader from _download_hooks so we can swap out public URLs
9-
# with interal URLs when the dataset is used within Facebook
10-
from torchtext._download_hooks import HttpReader
116
from torchtext._internal.module_utils import is_module_available
127
from torchtext.data.datasets_utils import (
138
_create_dataset_directory,
149
_wrap_split_argument,
1510
)
1611

12+
if is_module_available("torchdata"):
13+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14+
15+
# we import HttpReader from _download_hooks so we can swap out public URLs
16+
# with interal URLs when the dataset is used within Facebook
17+
from torchtext._download_hooks import HttpReader
18+
1719

1820
URL = "https://cims.nyu.edu/~sbowman/multinli/multinli_1.0.zip"
1921

torchtext/datasets/mrpc.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
from functools import partial
44
from typing import Union, Tuple
55

6-
from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper
76
from torchtext._internal.module_utils import is_module_available
87
from torchtext.data.datasets_utils import (
98
_wrap_split_argument,
109
_create_dataset_directory,
1110
)
1211

12+
if is_module_available("torchdata"):
13+
from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper
14+
1315

1416
URL = {
1517
"train": "https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt",

torchtext/datasets/multi30k.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6-
from torchtext._download_hooks import GDriveReader # noqa
7-
from torchtext._download_hooks import HttpReader
85
from torchtext._internal.module_utils import is_module_available
96
from torchtext.data.datasets_utils import (
107
_wrap_split_argument,
118
_create_dataset_directory,
129
)
1310

11+
if is_module_available("torchdata"):
12+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13+
from torchtext._download_hooks import HttpReader
14+
1415
URL = {
1516
"train": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz",
1617
"valid": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz",

torchtext/datasets/penntreebank.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
from functools import partial
33
from typing import Tuple, Union
44

5-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6-
from torchtext._download_hooks import GDriveReader # noqa
7-
from torchtext._download_hooks import HttpReader
85
from torchtext._internal.module_utils import is_module_available
96
from torchtext.data.datasets_utils import (
107
_wrap_split_argument,
118
_create_dataset_directory,
129
)
1310

11+
if is_module_available("torchdata"):
12+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13+
from torchtext._download_hooks import HttpReader
14+
1415
URL = {
1516
"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt",
1617
"test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt",

torchtext/datasets/qnli.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,19 @@
33
import os
44
from functools import partial
55

6-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7-
8-
# we import HttpReader from _download_hooks so we can swap out public URLs
9-
# with interal URLs when the dataset is used within Facebook
10-
from torchtext._download_hooks import HttpReader
116
from torchtext._internal.module_utils import is_module_available
127
from torchtext.data.datasets_utils import (
138
_create_dataset_directory,
149
_wrap_split_argument,
1510
)
1611

12+
if is_module_available("torchdata"):
13+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14+
15+
# we import HttpReader from _download_hooks so we can swap out public URLs
16+
# with interal URLs when the dataset is used within Facebook
17+
from torchtext._download_hooks import HttpReader
18+
1719

1820
URL = "https://dl.fbaipublicfiles.com/glue/data/QNLIv2.zip"
1921

torchtext/datasets/qqp.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import os
22
from functools import partial
33

4-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5-
from torchtext._download_hooks import HttpReader
64
from torchtext._internal.module_utils import is_module_available
75
from torchtext.data.datasets_utils import _create_dataset_directory
86

7+
if is_module_available("torchdata"):
8+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
9+
from torchtext._download_hooks import HttpReader
10+
911
URL = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"
1012

1113
MD5 = "b6d5672bd9dc1e66ab2bb020ebeafb8d"

0 commit comments

Comments
 (0)