Skip to content

Commit 5813d03

Browse files
committed
The original link to the wikitext-2 dataset is invalid, so it has been changed to a source from Kaggle.both torch and mxnet.
Description of changes: By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent 3859645 commit 5813d03

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

d2l/mxnet.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2272,8 +2272,8 @@ def forward(self, tokens, segments, valid_lens=None, pred_positions=None):
22722272
return encoded_X, mlm_Y_hat, nsp_Y_hat
22732273

22742274
d2l.DATA_HUB['wikitext-2'] = (
2275-
'https://s3.amazonaws.com/research.metamind.io/wikitext/'
2276-
'wikitext-2-v1.zip', '3c914d17d80b1459be871a5039ac23e752a53cbe')
2275+
'https://www.kaggle.com/api/v1/datasets/download/bestwater/wikitext-2-v1'
2276+
'', 'ca5f319246c1e34d406780c0b6c5d1b0ec9b9a10')
22772277

22782278
def _read_wiki(data_dir):
22792279
"""Defined in :numref:`sec_bert-dataset`"""
@@ -3104,7 +3104,11 @@ def download(url, folder='../data', sha1_hash=None):
31043104
# For back compatability
31053105
url, sha1_hash = DATA_HUB[url]
31063106
os.makedirs(folder, exist_ok=True)
3107-
fname = os.path.join(folder, url.split('/')[-1])
3107+
file_name = url.split('/')[-1]
3108+
if (not "." in file_name) and file_name in ["wikitext-2-v1"]:
3109+
file_name += ".zip"
3110+
fname = os.path.join(folder, file_name)
3111+
31083112
# Check if hit cache
31093113
if os.path.exists(fname) and sha1_hash:
31103114
sha1 = hashlib.sha1()

0 commit comments

Comments
 (0)