|
4 | 4 | <packages> |
5 | 5 | <package id="abc" name="Australian Broadcasting Commission 2006" webpage="http://www.abc.net.au/" author="Australian Broadcasting Commission" unzip="1" unzipped_size="4054966" size="1487851" checksum="ffb36b67ff24cbf7daaf171c897eb904" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/abc.zip" /> |
6 | 6 | <package id="alpino" name="Alpino Dutch Treebank" webpage="http://www.let.rug.nl/~vannoord/trees/" contact="Gertjan van Noord" license="Distributed with permission of Gertjan van Noord" unzip="1" unzipped_size="21604821" size="2797255" checksum="ae529a1c5f13d6074f5b0d68d8edb537" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/alpino.zip" /> |
7 | | - <package id="averaged_perceptron_tagger" name="Averaged Perceptron Tagger" languages="English" unzip="1" unzipped_size="6138625" size="2526731" checksum="05c91d607ee1043181233365b3f76978" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger.zip" /> |
8 | | - <package id="averaged_perceptron_tagger_eng" name="Averaged Perceptron Tagger (JSON)" languages="English" unzip="1" unzipped_size="5703817" size="1539115" checksum="729e2255f83045670374180de9bdb613" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_eng.zip" /> |
9 | | - <package id="averaged_perceptron_tagger_ru" name="Averaged Perceptron Tagger (Russian)" webpage="http://www.ruscorpora.ru/en/" languages="Russian" unzip="1" unzipped_size="23247411" size="8628828" checksum="f7051368e4aff6718f8b38c1362dfdb1" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_ru.zip" /> |
10 | | - <package id="averaged_perceptron_tagger_rus" name="Averaged Perceptron Tagger (Russian)" webpage="http://www.ruscorpora.ru/en/" languages="Russian" unzip="1" unzipped_size="30246815" size="5997187" checksum="073f704b73bf8d88037e464852e34420" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_rus.zip" /> |
| 7 | + <package id="averaged_perceptron_tagger" name="Averaged Perceptron Tagger" languages="English" license="MIT License" webpage="https://github.com/sloria/textblob-aptagger" unzip="1" unzipped_size="6138625" size="2526731" checksum="05c91d607ee1043181233365b3f76978" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger.zip" /> |
| 8 | + <package id="averaged_perceptron_tagger_eng" name="Averaged Perceptron Tagger (JSON)" languages="English" license="MIT License" webpage="https://github.com/sloria/textblob-aptagger" unzip="1" unzipped_size="5703817" size="1539115" checksum="729e2255f83045670374180de9bdb613" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_eng.zip" /> |
| 9 | + <package id="averaged_perceptron_tagger_ru" name="Averaged Perceptron Tagger (Russian)" webpage="http://www.ruscorpora.ru/en/" languages="Russian" license="MIT License" unzip="1" unzipped_size="23247411" size="8628828" checksum="f7051368e4aff6718f8b38c1362dfdb1" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_ru.zip" /> |
| 10 | + <package id="averaged_perceptron_tagger_rus" name="Averaged Perceptron Tagger (Russian)" webpage="http://www.ruscorpora.ru/en/" languages="Russian" license="MIT License" unzip="1" unzipped_size="30246815" size="5997187" checksum="073f704b73bf8d88037e464852e34420" subdir="taggers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/taggers/averaged_perceptron_tagger_rus.zip" /> |
11 | 11 | <package id="basque_grammars" name="Grammars for Basque" author="Kepa Sarasola" languages="Spanish" unzip="1" unzipped_size="5550" size="4704" checksum="0e3518cb2aeb2600cb2841df7f035606" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/basque_grammars.zip" /> |
12 | 12 | <package id="bcp47" name="BCP-47 Language Tags" license="IETF Trust and Unicode Inc." copyright="Copyright (c) 2022 IETF Trust and Copyright (c) 1991-2022 Unicode" webpage="https://www.rfc-editor.org/rfc/rfc5646.html" unzip="0" unzipped_size="1433135" size="222952" checksum="8ef6c0dfa7661e3338dd99c495a7d9b6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/bcp47.zip" /> |
13 | 13 | <package id="biocreative_ppi" name="BioCreAtIvE (Critical Assessment of Information Extraction Systems in Biology)" webpage="http://www.mitre.org/public/biocreative/" copyright="Public Domain (not copyrighted)" license="Public Domain" unzip="1" unzipped_size="1537086" size="223566" checksum="d3be36b53ab201372f1cd63ffc75e9a9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/biocreative_ppi.zip" /> |
|
28 | 28 | <package id="crubadan" name="Crubadan Corpus" copyright="Copyright (C) 2010 Kevin Scannell" author="Kevin Scannell" license="GPLv3" webpage="http://borel.slu.edu/crubadan/" unzip="1" unzipped_size="11256183" size="5288655" checksum="3cc831382dec41b8d9a06d93ef300352" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/crubadan.zip" /> |
29 | 29 | <package id="dependency_treebank" name="Dependency Parsed Treebank" sample="True" copyright="Copyright (C) 1995 University of Pennsylvania" license="This is a 10% fragment of Penn Treebank, (C) LDC 1995, which has been dependency parsed. It is made available under fair use for the purposes of illustrating NLTK tools for tokenizing, tagging, chunking and parsing. This data is for non-commercial use only." unzip="1" unzipped_size="1069540" size="457429" checksum="631e959acaa42eea718daf04c5cdfa76" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dependency_treebank.zip" /> |
30 | 30 | <package id="dolch" name="Dolch Word List" webpage="https://en.wikipedia.org/wiki/Dolch_word_list" unzip="1" unzipped_size="1917" size="2116" checksum="6f9c042774b96366c93fd0f9a9adb697" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/dolch.zip" /> |
| 31 | + <package id="english_wordnet" name="Open English Wordnet" version="2024" license="This resource is derived from Princeton WordNet under the WordNet License and further developed under the Creative Commons Attribution 4.0 International License. You may share and adapt this resource providing attribution is given to both Princeton WordNet and the Open English WordNet team." copyright="Open English Wordnet 2024 Copyright 2024 by the Open English Wordnet team. WordNet 3.1 Copyright 2011 by Princeton University. All rights reserved." webpage="https://en-word.net/" unzip="1" unzipped_size="38860076" size="11458203" checksum="a82dfe03cab9f6c9d85f8da8de88613b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/english_wordnet.zip" /> |
31 | 32 | <package id="europarl_raw" name="Sample European Parliament Proceedings Parallel Corpus" author="Philipp Koehn, University of Edinburgh" webpage="http://www.statmt.org/europarl" unzip="1" unzipped_size="41396100" size="12594977" checksum="7621d5675990b1decc012c823716ee76" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/europarl_raw.zip" /> |
32 | 33 | <package id="extended_omw" name="Extended Open Multilingual WordNet" copyright="Copyright (C) 2013 Francis Bond and Ryan Foster" license="CC by SA 3.0 Licence (for data from Wikitionary) and Unicode, Inc. Licence Agreement (for data from CLDR)" webpage="http://compling.hss.ntu.edu.sg/omw/summx.html" unzip="0" unzipped_size="36087752" size="11251284" checksum="8cc3931b20fdc2a2fe1ed9d42567d51b" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/extended_omw.zip" /> |
33 | 34 | <package id="floresta" name="Portuguese Treebank" license="Non-commercial use only" webpage="http://www.linguateca.pt/Floresta/" unzip="1" unzipped_size="16414136" size="1882021" checksum="de5f1df09949f080e0f616f0bc55967d" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/floresta.zip" /> |
|
150 | 151 | <item ref="crubadan" /> |
151 | 152 | <item ref="dependency_treebank" /> |
152 | 153 | <item ref="dolch" /> |
| 154 | + <item ref="english_wordnet" /> |
153 | 155 | <item ref="europarl_raw" /> |
154 | 156 | <item ref="extended_omw" /> |
155 | 157 | <item ref="floresta" /> |
|
264 | 266 | <item ref="crubadan" /> |
265 | 267 | <item ref="dependency_treebank" /> |
266 | 268 | <item ref="dolch" /> |
| 269 | + <item ref="english_wordnet" /> |
267 | 270 | <item ref="europarl_raw" /> |
268 | 271 | <item ref="extended_omw" /> |
269 | 272 | <item ref="floresta" /> |
|
364 | 367 | <item ref="crubadan" /> |
365 | 368 | <item ref="dependency_treebank" /> |
366 | 369 | <item ref="dolch" /> |
| 370 | + <item ref="english_wordnet" /> |
367 | 371 | <item ref="europarl_raw" /> |
368 | 372 | <item ref="extended_omw" /> |
369 | 373 | <item ref="floresta" /> |
|
0 commit comments