From 52c0d85c603bd8beff8c300661fcec09d673cec9 Mon Sep 17 00:00:00 2001 From: Sahan Paliskara Date: Thu, 21 Mar 2024 14:28:56 -0700 Subject: [PATCH 1/4] Add version to validate from torchtext validate binaries call (#2240) --- .github/workflows/validate-binaries.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml index 6bebfd3fcb..fe55276dd8 100644 --- a/.github/workflows/validate-binaries.yml +++ b/.github/workflows/validate-binaries.yml @@ -43,11 +43,17 @@ on: default: "" required: false type: string + pytorch_version: + description: 'PyTorch version to validate (ie. 2.0, 2.2.2, etc.) - optional' + default: "" + required: false + type: string jobs: validate-binaries: uses: pytorch/test-infra/.github/workflows/validate-domain-library.yml@main with: package_type: "conda,wheel" + version: ${{ inputs.version }} os: ${{ inputs.os }} channel: ${{ inputs.channel }} repository: "pytorch/text" From 51f373d64eebba4cb4359a5b2c3b6005e6598340 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 22 Mar 2024 14:34:24 +0000 Subject: [PATCH 2/4] Remove torchdata dependency from package and from CI (#2241) --- .circleci/unittest/linux/scripts/install.sh | 5 --- .circleci/unittest/windows/scripts/install.sh | 5 --- .github/workflows/build-conda-linux.yml | 2 +- .github/workflows/build-conda-m1.yml | 2 +- .github/workflows/build-conda-windows.yml | 2 +- .github/workflows/build-wheels-linux.yml | 2 +- .github/workflows/build-wheels-m1.yml | 2 +- .github/workflows/build-wheels-windows.yml | 2 +- .github/workflows/codeql.yml | 1 - .github/workflows/integration-test.yml | 4 +- .github/workflows/test-linux-cpu.yml | 5 +-- .github/workflows/test-linux-gpu.yml | 5 +-- .github/workflows/test-macos-cpu.yml | 5 +-- .github/workflows/test-windows-cpu.yml | 5 +-- .github/workflows/validate-binaries.yml | 2 +- README.rst | 2 +- packaging/install_torchdata.sh | 40 ------------------- packaging/pkg_helpers.bash | 12 ------ packaging/torchtext/meta.yaml | 1 - pytest.ini | 1 + requirements.txt | 1 - setup.py | 6 +-- test/smoke_tests/smoke_tests.py | 22 ---------- torchtext/_download_hooks.py | 1 - 24 files changed, 15 insertions(+), 120 deletions(-) delete mode 100755 packaging/install_torchdata.sh diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh index b5043d6065..fa56e74f7d 100755 --- a/.circleci/unittest/linux/scripts/install.sh +++ b/.circleci/unittest/linux/scripts/install.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash unset PYTORCH_VERSION -unset TORCHDATA_VERSION # For unittest, nightly PyTorch is used as the following section, # so no need to set PYTORCH_VERSION. # In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. @@ -30,10 +29,6 @@ printf "* Installing PyTorch\n" ) -printf "Installing torchdata nightly with portalocker\n" -pip install "portalocker>=2.0.0" -pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu - printf "* Installing torchtext\n" python setup.py develop diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh index 9ce0558fcd..7eb4810408 100644 --- a/.circleci/unittest/windows/scripts/install.sh +++ b/.circleci/unittest/windows/scripts/install.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash unset PYTORCH_VERSION -unset TORCHDATA_VERSION # For unittest, nightly PyTorch is used as the following section, # so no need to set PYTORCH_VERSION. # In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. @@ -19,10 +18,6 @@ conda activate ./env printf "* Installing PyTorch\n" conda install -y -c "pytorch-${UPLOAD_CHANNEL}" ${CONDA_CHANNEL_FLAGS} pytorch cpuonly -printf "* Installing torchdata nightly with portalocker\n" -pip install "portalocker>=2.0.0" -pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu - printf "* Installing pywin32_postinstall script\n" curl --output pywin32_postinstall.py https://raw.githubusercontent.com/mhammond/pywin32/main/pywin32_postinstall.py python pywin32_postinstall.py -install diff --git a/.github/workflows/build-conda-linux.yml b/.github/workflows/build-conda-linux.yml index 87dc12a010..6a2a9a775c 100644 --- a/.github/workflows/build-conda-linux.yml +++ b/.github/workflows/build-conda-linux.yml @@ -29,7 +29,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" conda-package-directory: packaging/torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-conda-m1.yml b/.github/workflows/build-conda-m1.yml index b59b27c7fa..c0e9b561cc 100644 --- a/.github/workflows/build-conda-m1.yml +++ b/.github/workflows/build-conda-m1.yml @@ -28,7 +28,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" conda-package-directory: packaging/torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-conda-windows.yml b/.github/workflows/build-conda-windows.yml index db1037574a..7f7af58a07 100644 --- a/.github/workflows/build-conda-windows.yml +++ b/.github/workflows/build-conda-windows.yml @@ -29,7 +29,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" conda-package-directory: packaging/torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-wheels-linux.yml b/.github/workflows/build-wheels-linux.yml index 2daa0be5de..2f49308fc7 100644 --- a/.github/workflows/build-wheels-linux.yml +++ b/.github/workflows/build-wheels-linux.yml @@ -34,7 +34,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" smoke-test-script: test/smoke_tests/smoke_tests.py package-name: torchtext diff --git a/.github/workflows/build-wheels-m1.yml b/.github/workflows/build-wheels-m1.yml index 49fbdd3a94..8e9ba24c95 100644 --- a/.github/workflows/build-wheels-m1.yml +++ b/.github/workflows/build-wheels-m1.yml @@ -32,7 +32,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" post-script: "" package-name: torchtext smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/build-wheels-windows.yml b/.github/workflows/build-wheels-windows.yml index 9dce4179d6..fe2327a3c2 100644 --- a/.github/workflows/build-wheels-windows.yml +++ b/.github/workflows/build-wheels-windows.yml @@ -33,7 +33,7 @@ jobs: matrix: include: - repository: pytorch/text - pre-script: packaging/install_torchdata.sh + pre-script: "" env-script: packaging/vc_env_helper.bat post-script: "" smoke-test-script: test/smoke_tests/smoke_tests.py diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b6af768134..8e6163288c 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -31,7 +31,6 @@ jobs: - name: Install Torch run: | python -m pip install cmake - python -m pip install --quiet --pre torch torchdata -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html sudo ln -s /usr/bin/ninja /usr/bin/ninja-build - name: Build TorchText diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 98824c2d74..e1bfabecbf 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -39,15 +39,13 @@ jobs: python -m spacy download en_core_web_sm printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision set -ex conda install \ --yes \ -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu python3 setup.py develop # Install integration test dependencies python3 -m pip --quiet install parameterized diff --git a/.github/workflows/test-linux-cpu.yml b/.github/workflows/test-linux-cpu.yml index 518f5f4383..6b3dbf0f5b 100644 --- a/.github/workflows/test-linux-cpu.yml +++ b/.github/workflows/test-linux-cpu.yml @@ -50,16 +50,13 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision set -ex conda install \ --yes \ -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install "portalocker>=2.0.0" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu python3 setup.py develop python3 -m pip install parameterized diff --git a/.github/workflows/test-linux-gpu.yml b/.github/workflows/test-linux-gpu.yml index 1bc124c47e..f51afd4fb9 100644 --- a/.github/workflows/test-linux-gpu.yml +++ b/.github/workflows/test-linux-gpu.yml @@ -54,7 +54,7 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch and TorchData + # Install PyTorch set -ex conda install \ --yes \ @@ -62,9 +62,6 @@ jobs: -c "pytorch-${CHANNEL}" \ -c nvidia "pytorch-${CHANNEL}"::pytorch[build="*${VERSION}*"] \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install "portalocker>=2.0.0" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu --quiet python3 setup.py develop python3 -m pip install parameterized --quiet diff --git a/.github/workflows/test-macos-cpu.yml b/.github/workflows/test-macos-cpu.yml index 774595bef9..4595627b50 100644 --- a/.github/workflows/test-macos-cpu.yml +++ b/.github/workflows/test-macos-cpu.yml @@ -55,7 +55,7 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision set -ex conda install \ --yes \ @@ -64,9 +64,6 @@ jobs: "${MKL_CONSTRAINT}" \ pytorch \ "${CUDATOOLKIT}" - printf "Installing torchdata nightly\n" - python3 -m pip install "portalocker>=2.0.0" - python3 -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu python3 setup.py develop python3 -m pip install parameterized diff --git a/.github/workflows/test-windows-cpu.yml b/.github/workflows/test-windows-cpu.yml index 1915ddd91c..0b6c9aa666 100644 --- a/.github/workflows/test-windows-cpu.yml +++ b/.github/workflows/test-windows-cpu.yml @@ -51,15 +51,12 @@ jobs: printf "* Downloading SpaCy German models\n" python -m spacy download de_core_news_sm - # Install PyTorch, Torchvision, and TorchData + # Install PyTorch, Torchvision conda install \ --yes \ -c "pytorch-${CHANNEL}" \ pytorch \ cpuonly - printf "Installing torchdata nightly\n" - python -m pip install "portalocker>=2.0.0" - python -m pip install --pre torchdata --index-url https://download.pytorch.org/whl/nightly/cpu printf "* Installing pywin32_postinstall script\n" curl --output pywin32_postinstall.py https://raw.githubusercontent.com/mhammond/pywin32/main/pywin32_postinstall.py diff --git a/.github/workflows/validate-binaries.yml b/.github/workflows/validate-binaries.yml index fe55276dd8..6ba6debc92 100644 --- a/.github/workflows/validate-binaries.yml +++ b/.github/workflows/validate-binaries.yml @@ -44,7 +44,7 @@ on: required: false type: string pytorch_version: - description: 'PyTorch version to validate (ie. 2.0, 2.2.2, etc.) - optional' + description: "PyTorch version to validate (ie. 2.0, 2.2.2, etc.) - optional" default: "" required: false type: string diff --git a/README.rst b/README.rst index c99be4af4b..a31853f769 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ torchtext +++++++++ -CAUTION: As of September 2023 we have paused active development of TorchText because our focus has shifted away from building out this library offering. +CAUTION: As of September 2023 we have paused active development of TorchText because our focus has shifted away from building out this library offering. We will continue to release new versions but do not anticipate any new feature development as we figure out future investments in this space. This repository consists of: diff --git a/packaging/install_torchdata.sh b/packaging/install_torchdata.sh deleted file mode 100755 index 7db52358a3..0000000000 --- a/packaging/install_torchdata.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -package_type="$PACKAGE_TYPE" -channel="$CHANNEL" -if [ -z "$package_type" ]; then - package_type="wheel" -fi -if [ -z "$channel" ]; then - channel="nightly" -fi - -# Wrong values -if [ "$package_type" != "wheel" ] && [ "$package_type" != "conda" ]; then - exit 1 -fi -if [ "$channel" != "nightly" ] && [ "$channel" != "test" ]; then - exit 1 -fi - - -if [ "$package_type" = "wheel" ]; then - install_cmd="pip install" - if [ "$channel" = "nightly" ]; then - install_cmd="${install_cmd} --pre" - fi - install_channel="--index-url https://download.pytorch.org/whl/${channel}/cpu" -else - install_cmd="conda install" - install_channel="-c pytorch-${channel}" -fi - -$install_cmd torchdata $install_channel - -if [ "$package_type" = "wheel" ]; then - TORCHDATA_VERSION="$(pip show torchdata | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" -else - TORCHDATA_VERSION="$(conda list -fe torchdata | grep torchdata | sed -e 's/torchdata=\(.*\)=py.*/\1/')" - echo "export CONDA_TORCHDATA_CONSTRAINT='- torchdata==${TORCHDATA_VERSION}'" >> "${BUILD_ENV_FILE}" -fi - -echo "export TORCHDATA_VERSION=${TORCHDATA_VERSION}" >> "${BUILD_ENV_FILE}" diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash index 5c45e8937a..221e1d639a 100644 --- a/packaging/pkg_helpers.bash +++ b/packaging/pkg_helpers.bash @@ -190,14 +190,6 @@ setup_pip_pytorch_version() { -f https://download.pytorch.org/whl/torch_stable.html \ -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/torch_${UPLOAD_CHANNEL}.html" fi - if [[ -z "$TORCHDATA_VERSION" ]]; then - pip_install --pre torchdata -f "https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html" - export TORCHDATA_VERSION="$(pip show torchdata | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" - else - pip_install "torchdata==$TORCHDATA_VERSION" \ - -f https://download.pytorch.org/whl/torch_stable.html \ - -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/torch_${UPLOAD_CHANNEL}.html" - fi } # Fill PYTORCH_VERSION with the latest conda nightly version, and @@ -232,10 +224,6 @@ setup_conda_pytorch_constraint() { export CONDA_EXTRA_BUILD_CONSTRAINT="- mkl<=2021.2.0" fi fi - if [[ -z "$TORCHDATA_VERSION" ]]; then - export TORCHDATA_VERSION="$(conda search --json 'torchdata[channel=pytorch-nightly]' | ${PYTHON} -c "import sys, json, re; print(re.sub(r'\\+.*$', '', json.load(sys.stdin)['torchdata'][-1]['version']))")" - fi - export CONDA_TORCHDATA_CONSTRAINT="- torchdata==$TORCHDATA_VERSION" } # Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT diff --git a/packaging/torchtext/meta.yaml b/packaging/torchtext/meta.yaml index 03221505e5..9d7502200d 100644 --- a/packaging/torchtext/meta.yaml +++ b/packaging/torchtext/meta.yaml @@ -24,7 +24,6 @@ requirements: - requests - tqdm {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} - {{ environ.get('CONDA_TORCHDATA_CONSTRAINT') }} build: string: py{{py}} diff --git a/pytest.ini b/pytest.ini index c7ba710bd7..b9bb2d26ca 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] +addopts = --ignore-glob=test/torchtext_unittest/datasets/* testpaths = test/ python_paths = ./ markers = diff --git a/requirements.txt b/requirements.txt index cbc13eefbf..079025ca62 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,6 @@ Sphinx pytest expecttest parameterized -torchdata>0.5 # Lets pytest find our code by automatically modifying PYTHONPATH pytest-pythonpath diff --git a/setup.py b/setup.py index d008cb9c90..a3fb2707c3 100644 --- a/setup.py +++ b/setup.py @@ -63,14 +63,10 @@ def _init_submodule(): print("-- Building version " + VERSION) pytorch_package_version = os.getenv("PYTORCH_VERSION") -torchdata_package_version = os.getenv("TORCHDATA_VERSION") pytorch_package_dep = "torch" if pytorch_package_version is not None: pytorch_package_dep += "==" + pytorch_package_version -torchdata_package_dep = "torchdata" -if torchdata_package_version is not None: - torchdata_package_dep += "==" + torchdata_package_version class clean(distutils.command.clean.clean): @@ -104,7 +100,7 @@ def run(self): description="Text utilities, models, transforms, and datasets for PyTorch.", long_description=read("README.rst"), license="BSD", - install_requires=["tqdm", "requests", pytorch_package_dep, "numpy", torchdata_package_dep], + install_requires=["tqdm", "requests", pytorch_package_dep, "numpy"], python_requires=">=3.8", classifiers=[ "Programming Language :: Python :: 3.8", diff --git a/test/smoke_tests/smoke_tests.py b/test/smoke_tests/smoke_tests.py index 2fbaeec5ec..58d579716a 100644 --- a/test/smoke_tests/smoke_tests.py +++ b/test/smoke_tests/smoke_tests.py @@ -1,28 +1,6 @@ """Run smoke tests""" -import os -import re - -import torchdata import torchtext -import torchtext.version # noqa: F401 - -NIGHTLY_ALLOWED_DELTA = 3 -channel = os.getenv("MATRIX_CHANNEL") - - -def validateTorchdataVersion(): - from datetime import datetime - - date_t_str = re.findall(r"dev\d+", torchdata.__version__)[0] - date_t_delta = datetime.now() - datetime.strptime(date_t_str[3:], "%Y%m%d") - - if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA: - raise RuntimeError(f"torchdata binary {torchdata.__version__} is more than {NIGHTLY_ALLOWED_DELTA} days old!") - -if channel == "nightly": - validateTorchdataVersion() print("torchtext version is ", torchtext.__version__) -print("torchdata version is ", torchdata.__version__) diff --git a/torchtext/_download_hooks.py b/torchtext/_download_hooks.py index 89baafafa5..f7a236482b 100644 --- a/torchtext/_download_hooks.py +++ b/torchtext/_download_hooks.py @@ -4,7 +4,6 @@ # This is to allow monkey-patching in fbcode from torch.hub import load_state_dict_from_url # noqa -from torchdata.datapipes.iter import HttpReader, GDriveReader # noqa F401 from tqdm import tqdm From ecb9ebc54cbd44b558f026d338c795d2bf7f4ef9 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 22 Mar 2024 17:05:21 +0000 Subject: [PATCH 3/4] Fix torchdata import error (#2242) * Remove stuff * stuff * lint --- torchtext/datasets/ag_news.py | 3 +-- torchtext/datasets/amazonreviewfull.py | 3 +-- torchtext/datasets/amazonreviewpolarity.py | 3 +-- torchtext/datasets/cc100.py | 8 ++++++-- torchtext/datasets/cnndm.py | 12 ++++++------ torchtext/datasets/cola.py | 3 +-- torchtext/datasets/conll2000chunking.py | 3 +-- torchtext/datasets/dbpedia.py | 3 +-- torchtext/datasets/enwik9.py | 3 +-- torchtext/datasets/imdb.py | 3 +-- torchtext/datasets/iwslt2016.py | 3 +-- torchtext/datasets/iwslt2017.py | 3 +-- torchtext/datasets/mnli.py | 5 ++--- torchtext/datasets/mrpc.py | 2 +- torchtext/datasets/multi30k.py | 6 +++--- torchtext/datasets/penntreebank.py | 6 +++--- torchtext/datasets/qnli.py | 5 ++--- torchtext/datasets/qqp.py | 3 +-- torchtext/datasets/rte.py | 5 ++--- torchtext/datasets/sogounews.py | 3 +-- torchtext/datasets/squad1.py | 3 +-- torchtext/datasets/squad2.py | 3 +-- torchtext/datasets/sst2.py | 5 ++--- torchtext/datasets/stsb.py | 5 ++--- torchtext/datasets/udpos.py | 3 +-- torchtext/datasets/wikitext103.py | 3 +-- torchtext/datasets/wikitext2.py | 3 +-- torchtext/datasets/wnli.py | 5 ++--- torchtext/datasets/yahooanswers.py | 3 +-- torchtext/datasets/yelpreviewfull.py | 3 +-- torchtext/datasets/yelpreviewpolarity.py | 3 +-- 31 files changed, 51 insertions(+), 73 deletions(-) diff --git a/torchtext/datasets/ag_news.py b/torchtext/datasets/ag_news.py index 5f1c7741f6..93f398329c 100644 --- a/torchtext/datasets/ag_news.py +++ b/torchtext/datasets/ag_news.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -65,6 +63,7 @@ def AG_NEWS(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) cache_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/amazonreviewfull.py b/torchtext/datasets/amazonreviewfull.py index 06e688279a..c916d2e034 100644 --- a/torchtext/datasets/amazonreviewfull.py +++ b/torchtext/datasets/amazonreviewfull.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -79,6 +77,7 @@ def AmazonReviewFull(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/amazonreviewpolarity.py b/torchtext/datasets/amazonreviewpolarity.py index 9616dc1d9e..a0ed0c6c40 100644 --- a/torchtext/datasets/amazonreviewpolarity.py +++ b/torchtext/datasets/amazonreviewpolarity.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -76,6 +74,7 @@ def AmazonReviewPolarity(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/cc100.py b/torchtext/datasets/cc100.py index 4ce2e92dd8..0f7cf2920f 100644 --- a/torchtext/datasets/cc100.py +++ b/torchtext/datasets/cc100.py @@ -1,8 +1,7 @@ import os.path from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader +from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, ) @@ -167,6 +166,11 @@ def CC100(root: str, language_code: str = "en"): """ if language_code not in VALID_CODES: raise ValueError(f"Invalid language code {language_code}") + if not is_module_available("torchdata"): + raise ModuleNotFoundError( + "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" + ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url = URL % language_code url_dp = IterableWrapper([url]) diff --git a/torchtext/datasets/cnndm.py b/torchtext/datasets/cnndm.py index 2adba04fd1..92b2da8ce1 100644 --- a/torchtext/datasets/cnndm.py +++ b/torchtext/datasets/cnndm.py @@ -3,12 +3,6 @@ from functools import partial from typing import Union, Set, Tuple -from torchdata.datapipes.iter import ( - FileOpener, - IterableWrapper, - OnlineReader, - GDriveReader, -) from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -141,6 +135,12 @@ def CNNDM(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import ( # noqa + FileOpener, + IterableWrapper, + OnlineReader, + GDriveReader, + ) cnn_dp = _load_stories(root, "cnn", split) dailymail_dp = _load_stories(root, "dailymail", split) diff --git a/torchtext/datasets/cola.py b/torchtext/datasets/cola.py index 214c435d03..6ec6cd8b29 100644 --- a/torchtext/datasets/cola.py +++ b/torchtext/datasets/cola.py @@ -3,8 +3,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory, _wrap_split_argument @@ -76,6 +74,7 @@ def CoLA(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/conll2000chunking.py b/torchtext/datasets/conll2000chunking.py index acbd9cbd0c..983059faf1 100644 --- a/torchtext/datasets/conll2000chunking.py +++ b/torchtext/datasets/conll2000chunking.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -68,6 +66,7 @@ def CoNLL2000Chunking(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) diff --git a/torchtext/datasets/dbpedia.py b/torchtext/datasets/dbpedia.py index be86f1a98c..d563f965cb 100644 --- a/torchtext/datasets/dbpedia.py +++ b/torchtext/datasets/dbpedia.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -75,6 +73,7 @@ def DBpedia(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/enwik9.py b/torchtext/datasets/enwik9.py index cbd5e647a7..8b30cc4da8 100644 --- a/torchtext/datasets/enwik9.py +++ b/torchtext/datasets/enwik9.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory @@ -50,6 +48,7 @@ def EnWik9(root: str): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/imdb.py b/torchtext/datasets/imdb.py index 09fba57b04..cefedc4bf0 100644 --- a/torchtext/datasets/imdb.py +++ b/torchtext/datasets/imdb.py @@ -3,8 +3,6 @@ from pathlib import Path from typing import Tuple, Union -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory from torchtext.data.datasets_utils import _wrap_split_argument @@ -89,6 +87,7 @@ def IMDB(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) diff --git a/torchtext/datasets/iwslt2016.py b/torchtext/datasets/iwslt2016.py index dd4b806e8c..f1a05dcaea 100644 --- a/torchtext/datasets/iwslt2016.py +++ b/torchtext/datasets/iwslt2016.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _clean_files, @@ -219,6 +217,7 @@ def IWSLT2016( raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa if not isinstance(language_pair, list) and not isinstance(language_pair, tuple): raise ValueError("language_pair must be list or tuple but got {} instead".format(type(language_pair))) diff --git a/torchtext/datasets/iwslt2017.py b/torchtext/datasets/iwslt2017.py index 3707986d54..2095647fe4 100644 --- a/torchtext/datasets/iwslt2017.py +++ b/torchtext/datasets/iwslt2017.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _clean_files, @@ -184,6 +182,7 @@ def IWSLT2017(root=".data", split=("train", "valid", "test"), language_pair=("de raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa valid_set = "dev2010" test_set = "tst2010" diff --git a/torchtext/datasets/mnli.py b/torchtext/datasets/mnli.py index f4335c5ccf..def9354b53 100644 --- a/torchtext/datasets/mnli.py +++ b/torchtext/datasets/mnli.py @@ -3,11 +3,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -89,6 +87,7 @@ def MNLI(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/mrpc.py b/torchtext/datasets/mrpc.py index e9abea1721..c3e6f72a91 100644 --- a/torchtext/datasets/mrpc.py +++ b/torchtext/datasets/mrpc.py @@ -3,7 +3,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, HttpReader, IterableWrapper from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -67,6 +66,7 @@ def MRPC(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) # cache data on-disk with sanity check diff --git a/torchtext/datasets/multi30k.py b/torchtext/datasets/multi30k.py index ea1c2015ae..db666bfda9 100644 --- a/torchtext/datasets/multi30k.py +++ b/torchtext/datasets/multi30k.py @@ -2,9 +2,8 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader # noqa -from torchtext._download_hooks import HttpReader +# noqa + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -89,6 +88,7 @@ def Multi30k(root: str, split: Union[Tuple[str], str], language_pair: Tuple[str] raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) diff --git a/torchtext/datasets/penntreebank.py b/torchtext/datasets/penntreebank.py index 1e0d9f295f..a7f504b9a4 100644 --- a/torchtext/datasets/penntreebank.py +++ b/torchtext/datasets/penntreebank.py @@ -2,9 +2,8 @@ from functools import partial from typing import Tuple, Union -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader # noqa -from torchtext._download_hooks import HttpReader +# noqa + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -70,6 +69,7 @@ def PennTreebank(root, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) cache_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/qnli.py b/torchtext/datasets/qnli.py index aa71eeb208..cbdca8fbc4 100644 --- a/torchtext/datasets/qnli.py +++ b/torchtext/datasets/qnli.py @@ -3,11 +3,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -81,6 +79,7 @@ def QNLI(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/qqp.py b/torchtext/datasets/qqp.py index 013a6a82a8..887675cfde 100644 --- a/torchtext/datasets/qqp.py +++ b/torchtext/datasets/qqp.py @@ -1,8 +1,6 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import _create_dataset_directory @@ -48,6 +46,7 @@ def QQP(root: str): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/rte.py b/torchtext/datasets/rte.py index 06355468ae..61915a1790 100644 --- a/torchtext/datasets/rte.py +++ b/torchtext/datasets/rte.py @@ -3,11 +3,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -81,6 +79,7 @@ def RTE(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/sogounews.py b/torchtext/datasets/sogounews.py index 80c7c9af9a..440e811ce4 100644 --- a/torchtext/datasets/sogounews.py +++ b/torchtext/datasets/sogounews.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -79,6 +77,7 @@ def SogouNews(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/squad1.py b/torchtext/datasets/squad1.py index 5c83bcdec2..0949eb103c 100644 --- a/torchtext/datasets/squad1.py +++ b/torchtext/datasets/squad1.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -62,6 +60,7 @@ def SQuAD1(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) # cache data on-disk with sanity check diff --git a/torchtext/datasets/squad2.py b/torchtext/datasets/squad2.py index 48ef86556c..0ad1e25ac1 100644 --- a/torchtext/datasets/squad2.py +++ b/torchtext/datasets/squad2.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -63,6 +61,7 @@ def SQuAD2(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL[split]]) # cache data on-disk with sanity check diff --git a/torchtext/datasets/sst2.py b/torchtext/datasets/sst2.py index 132b22d68d..a14cf45709 100644 --- a/torchtext/datasets/sst2.py +++ b/torchtext/datasets/sst2.py @@ -2,11 +2,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -86,6 +84,7 @@ def SST2(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/stsb.py b/torchtext/datasets/stsb.py index 324ed77245..1f66bf5279 100644 --- a/torchtext/datasets/stsb.py +++ b/torchtext/datasets/stsb.py @@ -2,11 +2,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -82,6 +80,7 @@ def STSB(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/udpos.py b/torchtext/datasets/udpos.py index 3c7b76b124..c6ee494dae 100644 --- a/torchtext/datasets/udpos.py +++ b/torchtext/datasets/udpos.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -66,6 +64,7 @@ def UDPOS(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/wikitext103.py b/torchtext/datasets/wikitext103.py index 0914d708e9..6baff13ad6 100644 --- a/torchtext/datasets/wikitext103.py +++ b/torchtext/datasets/wikitext103.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -71,6 +69,7 @@ def WikiText103(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) # cache data on-disk diff --git a/torchtext/datasets/wikitext2.py b/torchtext/datasets/wikitext2.py index ec686b94cd..94e90f2031 100644 --- a/torchtext/datasets/wikitext2.py +++ b/torchtext/datasets/wikitext2.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import HttpReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -71,6 +69,7 @@ def WikiText2(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) # cache data on-disk diff --git a/torchtext/datasets/wnli.py b/torchtext/datasets/wnli.py index c864275899..f4574d5e4e 100644 --- a/torchtext/datasets/wnli.py +++ b/torchtext/datasets/wnli.py @@ -2,11 +2,9 @@ import os from functools import partial -from torchdata.datapipes.iter import FileOpener, IterableWrapper - # we import HttpReader from _download_hooks so we can swap out public URLs # with interal URLs when the dataset is used within Facebook -from torchtext._download_hooks import HttpReader + from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _create_dataset_directory, @@ -78,6 +76,7 @@ def WNLI(root, split): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at `https://github.com/pytorch/data`" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) cache_compressed_dp = url_dp.on_disk_cache( diff --git a/torchtext/datasets/yahooanswers.py b/torchtext/datasets/yahooanswers.py index 9fad10ff1d..da357977cb 100644 --- a/torchtext/datasets/yahooanswers.py +++ b/torchtext/datasets/yahooanswers.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -75,6 +73,7 @@ def YahooAnswers(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) diff --git a/torchtext/datasets/yelpreviewfull.py b/torchtext/datasets/yelpreviewfull.py index 1272dae45c..7bea8f1211 100644 --- a/torchtext/datasets/yelpreviewfull.py +++ b/torchtext/datasets/yelpreviewfull.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -74,6 +72,7 @@ def YelpReviewFull(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) diff --git a/torchtext/datasets/yelpreviewpolarity.py b/torchtext/datasets/yelpreviewpolarity.py index 90e1e31e59..08559f0c68 100644 --- a/torchtext/datasets/yelpreviewpolarity.py +++ b/torchtext/datasets/yelpreviewpolarity.py @@ -2,8 +2,6 @@ from functools import partial from typing import Union, Tuple -from torchdata.datapipes.iter import FileOpener, IterableWrapper -from torchtext._download_hooks import GDriveReader from torchtext._internal.module_utils import is_module_available from torchtext.data.datasets_utils import ( _wrap_split_argument, @@ -74,6 +72,7 @@ def YelpReviewPolarity(root: str, split: Union[Tuple[str], str]): raise ModuleNotFoundError( "Package `torchdata` not found. Please install following instructions at https://github.com/pytorch/data" ) + from torchdata.datapipes.iter import FileOpener, GDriveReader, HttpReader, IterableWrapper # noqa url_dp = IterableWrapper([URL]) From 34166e5ea6b9eb38c15cd5328f8b4841a28315bf Mon Sep 17 00:00:00 2001 From: PaliC Date: Fri, 5 Apr 2024 16:36:05 -0700 Subject: [PATCH 4/4] version bump --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index a2640ee447..33aec98e91 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.17.0a0 +0.18.0a0