Skip to content

Commit 3859645

Browse files
authored
Update wikitext-2 dataset URL and file handling
The original link to the wikitext-2 dataset is invalid, so it has been changed to a source from Kaggle.
1 parent 23d7a5a commit 3859645

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

d2l/torch.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2285,8 +2285,8 @@ def forward(self, tokens, segments, valid_lens=None, pred_positions=None):
22852285
return encoded_X, mlm_Y_hat, nsp_Y_hat
22862286

22872287
d2l.DATA_HUB['wikitext-2'] = (
2288-
'https://s3.amazonaws.com/research.metamind.io/wikitext/'
2289-
'wikitext-2-v1.zip', '3c914d17d80b1459be871a5039ac23e752a53cbe')
2288+
'https://www.kaggle.com/api/v1/datasets/download/bestwater/wikitext-2-v1'
2289+
'', 'ca5f319246c1e34d406780c0b6c5d1b0ec9b9a10')
22902290

22912291
def _read_wiki(data_dir):
22922292
"""Defined in :numref:`sec_bert-dataset`"""
@@ -3202,7 +3202,11 @@ def download(url, folder='../data', sha1_hash=None):
32023202
# For back compatability
32033203
url, sha1_hash = DATA_HUB[url]
32043204
os.makedirs(folder, exist_ok=True)
3205-
fname = os.path.join(folder, url.split('/')[-1])
3205+
file_name = url.split('/')[-1]
3206+
if (not "." in file_name) and file_name in ["wikitext-2-v1"]:
3207+
file_name += ".zip"
3208+
fname = os.path.join(folder, file_name)
3209+
32063210
# Check if hit cache
32073211
if os.path.exists(fname) and sha1_hash:
32083212
sha1 = hashlib.sha1()

0 commit comments

Comments
 (0)