Skip to content

Commit 771b5a2

Browse files
authored
Add Torchdata as a requirement and remove conditional imports of Torchdata (#1961) (#1962)
* Add Torchdata as a requirement and remove conditional imports of Torchdata * Add torchdata dep to meta.yaml
1 parent e2b27f9 commit 771b5a2

34 files changed

+88
-151
lines changed

packaging/torchtext/meta.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ requirements:
2323
- python
2424
- requests
2525
- tqdm
26+
- torchdata
2627
{{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
2728

2829
build:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def run(self):
100100
description="Text utilities and datasets for PyTorch",
101101
long_description=read("README.rst"),
102102
license="BSD",
103-
install_requires=["tqdm", "requests", pytorch_package_dep, "numpy"],
103+
install_requires=["tqdm", "requests", pytorch_package_dep, "numpy", "torchdata"],
104104
python_requires=">=3.7",
105105
classifiers=[
106106
"Programming Language :: Python :: 3.7",

torchtext/_download_hooks.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,9 @@
44

55
# This is to allow monkey-patching in fbcode
66
from torch.hub import load_state_dict_from_url # noqa
7-
from torchtext._internal.module_utils import is_module_available
7+
from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401
88
from tqdm import tqdm
99

10-
if is_module_available("torchdata"):
11-
from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401
12-
1310

1411
def _stream_response(r, chunk_size=16 * 1024):
1512
total_size = int(r.headers.get("Content-length", 0))

torchtext/datasets/ag_news.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6+
from torchtext._download_hooks import HttpReader
57
from torchtext._internal.module_utils import is_module_available
68
from torchtext.data.datasets_utils import (
79
_wrap_split_argument,
810
_create_dataset_directory,
911
)
1012

11-
if is_module_available("torchdata"):
12-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13-
from torchtext._download_hooks import HttpReader
14-
1513
URL = {
1614
"train": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv",
1715
"test": "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv",

torchtext/datasets/amazonreviewfull.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6+
from torchtext._download_hooks import GDriveReader
57
from torchtext._internal.module_utils import is_module_available
68
from torchtext.data.datasets_utils import (
79
_wrap_split_argument,
810
_create_dataset_directory,
911
)
1012

11-
if is_module_available("torchdata"):
12-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13-
from torchtext._download_hooks import GDriveReader
14-
1513
URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbZVhsUnRWRDhETzA"
1614

1715
MD5 = "57d28bd5d930e772930baddf36641c7c"

torchtext/datasets/amazonreviewpolarity.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6+
from torchtext._download_hooks import GDriveReader
57
from torchtext._internal.module_utils import is_module_available
68
from torchtext.data.datasets_utils import (
79
_wrap_split_argument,
810
_create_dataset_directory,
911
)
1012

11-
if is_module_available("torchdata"):
12-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13-
from torchtext._download_hooks import GDriveReader
14-
1513
URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbaW12WVVZS2drcnM"
1614

1715
MD5 = "fe39f8b653cada45afd5792e0f0e8f9b"

torchtext/datasets/cc100.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
import os.path
22
from functools import partial
33

4-
from torchtext._internal.module_utils import is_module_available
4+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5+
from torchtext._download_hooks import HttpReader
56
from torchtext.data.datasets_utils import (
67
_create_dataset_directory,
78
)
89

9-
if is_module_available("torchdata"):
10-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
11-
from torchtext._download_hooks import HttpReader
12-
1310
URL = "http://data.statmt.org/cc-100/%s.txt.xz"
1411

1512
VALID_CODES = {

torchtext/datasets/cnndm.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,18 @@
33
from functools import partial
44
from typing import Union, Set, Tuple
55

6+
from torchdata.datapipes.iter import (
7+
FileOpener,
8+
IterableWrapper,
9+
OnlineReader,
10+
GDriveReader,
11+
)
612
from torchtext._internal.module_utils import is_module_available
713
from torchtext.data.datasets_utils import (
814
_wrap_split_argument,
915
_create_dataset_directory,
1016
)
1117

12-
if is_module_available("torchdata"):
13-
from torchdata.datapipes.iter import (
14-
FileOpener,
15-
IterableWrapper,
16-
OnlineReader,
17-
GDriveReader,
18-
)
19-
2018
DATASET_NAME = "CNNDM"
2119

2220
SPLIT_LIST = {

torchtext/datasets/cola.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@
33
from functools import partial
44
from typing import Union, Tuple
55

6+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7+
from torchtext._download_hooks import HttpReader
68
from torchtext._internal.module_utils import is_module_available
79
from torchtext.data.datasets_utils import _create_dataset_directory, _wrap_split_argument
810

9-
if is_module_available("torchdata"):
10-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
11-
from torchtext._download_hooks import HttpReader
12-
1311
URL = "https://nyu-mll.github.io/CoLA/cola_public_1.1.zip"
1412

1513
MD5 = "9f6d88c3558ec424cd9d66ea03589aba"

torchtext/datasets/conll2000chunking.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6+
from torchtext._download_hooks import HttpReader
57
from torchtext._internal.module_utils import is_module_available
68
from torchtext.data.datasets_utils import (
79
_wrap_split_argument,
810
_create_dataset_directory,
911
)
1012

11-
if is_module_available("torchdata"):
12-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13-
from torchtext._download_hooks import HttpReader
14-
1513
URL = {
1614
"train": "https://www.clips.uantwerpen.be/conll2000/chunking/train.txt.gz",
1715
"test": "https://www.clips.uantwerpen.be/conll2000/chunking/test.txt.gz",

torchtext/datasets/dbpedia.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6+
from torchtext._download_hooks import GDriveReader
57
from torchtext._internal.module_utils import is_module_available
68
from torchtext.data.datasets_utils import (
79
_wrap_split_argument,
810
_create_dataset_directory,
911
)
1012

11-
if is_module_available("torchdata"):
12-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13-
from torchtext._download_hooks import GDriveReader
14-
1513
URL = "https://drive.google.com/uc?export=download&id=0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k"
1614

1715
MD5 = "dca7b1ae12b1091090db52aa7ec5ca64"

torchtext/datasets/enwik9.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import os
22
from functools import partial
33

4+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5+
from torchtext._download_hooks import HttpReader
46
from torchtext._internal.module_utils import is_module_available
57
from torchtext.data.datasets_utils import _create_dataset_directory
68

7-
if is_module_available("torchdata"):
8-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
9-
from torchtext._download_hooks import HttpReader
10-
119
URL = "http://mattmahoney.net/dc/enwik9.zip"
1210

1311
MD5 = "3e773f8a1577fda2e27f871ca17f31fd"

torchtext/datasets/imdb.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,12 @@
33
from pathlib import Path
44
from typing import Tuple, Union
55

6+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7+
from torchtext._download_hooks import HttpReader
68
from torchtext._internal.module_utils import is_module_available
79
from torchtext.data.datasets_utils import _create_dataset_directory
810
from torchtext.data.datasets_utils import _wrap_split_argument
911

10-
if is_module_available("torchdata"):
11-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
12-
from torchtext._download_hooks import HttpReader
13-
1412
URL = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
1513

1614
MD5 = "7c2ac02c03563afcf9b574c7e56c153a"

torchtext/datasets/iwslt2016.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import os
22
from functools import partial
33

4+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5+
from torchtext._download_hooks import GDriveReader
46
from torchtext._internal.module_utils import is_module_available
57
from torchtext.data.datasets_utils import (
68
_clean_files,
@@ -9,10 +11,6 @@
911
_wrap_split_argument,
1012
)
1113

12-
if is_module_available("torchdata"):
13-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14-
from torchtext._download_hooks import GDriveReader
15-
1614
URL = "https://drive.google.com/uc?id=1l5y6Giag9aRPwGtuZHswh3w5v3qEz8D8"
1715

1816
_PATH = "2016-01.tgz"

torchtext/datasets/iwslt2017.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import os
22
from functools import partial
33

4+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5+
from torchtext._download_hooks import GDriveReader
46
from torchtext._internal.module_utils import is_module_available
57
from torchtext.data.datasets_utils import (
68
_clean_files,
@@ -9,10 +11,6 @@
911
_wrap_split_argument,
1012
)
1113

12-
if is_module_available("torchdata"):
13-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14-
from torchtext._download_hooks import GDriveReader
15-
1614
URL = "https://drive.google.com/u/0/uc?id=12ycYSzLIG253AFN35Y6qoyf9wtkOjakp"
1715
_PATH = "2017-01-trnmted.tgz"
1816
MD5 = "aca701032b1c4411afc4d9fa367796ba"

torchtext/datasets/mnli.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,17 @@
33
import os
44
from functools import partial
55

6+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7+
8+
# we import HttpReader from _download_hooks so we can swap out public URLs
9+
# with interal URLs when the dataset is used within Facebook
10+
from torchtext._download_hooks import HttpReader
611
from torchtext._internal.module_utils import is_module_available
712
from torchtext.data.datasets_utils import (
813
_create_dataset_directory,
914
_wrap_split_argument,
1015
)
1116

12-
if is_module_available("torchdata"):
13-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14-
15-
# we import HttpReader from _download_hooks so we can swap out public URLs
16-
# with interal URLs when the dataset is used within Facebook
17-
from torchtext._download_hooks import HttpReader
18-
1917

2018
URL = "https://cims.nyu.edu/~sbowman/multinli/multinli_1.0.zip"
2119

torchtext/datasets/mrpc.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,13 @@
33
from functools import partial
44
from typing import Union, Tuple
55

6+
from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper
67
from torchtext._internal.module_utils import is_module_available
78
from torchtext.data.datasets_utils import (
89
_wrap_split_argument,
910
_create_dataset_directory,
1011
)
1112

12-
if is_module_available("torchdata"):
13-
from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper
14-
1513

1614
URL = {
1715
"train": "https://dl.fbaipublicfiles.com/senteval/senteval_data/msr_paraphrase_train.txt",

torchtext/datasets/multi30k.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@
22
from functools import partial
33
from typing import Union, Tuple
44

5+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6+
from torchtext._download_hooks import GDriveReader # noqa
7+
from torchtext._download_hooks import HttpReader
58
from torchtext._internal.module_utils import is_module_available
69
from torchtext.data.datasets_utils import (
710
_wrap_split_argument,
811
_create_dataset_directory,
912
)
1013

11-
if is_module_available("torchdata"):
12-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13-
from torchtext._download_hooks import HttpReader
14-
1514
URL = {
1615
"train": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/training.tar.gz",
1716
"valid": "http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz",

torchtext/datasets/penntreebank.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@
22
from functools import partial
33
from typing import Tuple, Union
44

5+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
6+
from torchtext._download_hooks import GDriveReader # noqa
7+
from torchtext._download_hooks import HttpReader
58
from torchtext._internal.module_utils import is_module_available
69
from torchtext.data.datasets_utils import (
710
_wrap_split_argument,
811
_create_dataset_directory,
912
)
1013

11-
if is_module_available("torchdata"):
12-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
13-
from torchtext._download_hooks import HttpReader
14-
1514
URL = {
1615
"train": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.train.txt",
1716
"test": "https://raw.githubusercontent.com/wojzaremba/lstm/master/data/ptb.test.txt",

torchtext/datasets/qnli.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,17 @@
33
import os
44
from functools import partial
55

6+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
7+
8+
# we import HttpReader from _download_hooks so we can swap out public URLs
9+
# with interal URLs when the dataset is used within Facebook
10+
from torchtext._download_hooks import HttpReader
611
from torchtext._internal.module_utils import is_module_available
712
from torchtext.data.datasets_utils import (
813
_create_dataset_directory,
914
_wrap_split_argument,
1015
)
1116

12-
if is_module_available("torchdata"):
13-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
14-
15-
# we import HttpReader from _download_hooks so we can swap out public URLs
16-
# with interal URLs when the dataset is used within Facebook
17-
from torchtext._download_hooks import HttpReader
18-
1917

2018
URL = "https://dl.fbaipublicfiles.com/glue/data/QNLIv2.zip"
2119

torchtext/datasets/qqp.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import os
22
from functools import partial
33

4+
from torchdata.datapipes.iter import FileOpener, IterableWrapper
5+
from torchtext._download_hooks import HttpReader
46
from torchtext._internal.module_utils import is_module_available
57
from torchtext.data.datasets_utils import _create_dataset_directory
68

7-
if is_module_available("torchdata"):
8-
from torchdata.datapipes.iter import FileOpener, IterableWrapper
9-
from torchtext._download_hooks import HttpReader
10-
119
URL = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"
1210

1311
MD5 = "b6d5672bd9dc1e66ab2bb020ebeafb8d"

0 commit comments

Comments
 (0)