Skip to content

Commit f5dd1a3

Browse files
polmsvlandegadrianeboyd
committed
Add a way to get the URL to download a pipeline to the CLI (explosion#11175)
* Add a dry run flag to download * Remove --dry-run, add --url option to `spacy info` instead * Make mypy happy * Print only the URL, so it's easier to use in scripts * Don't add the egg hash unless downloading an sdist * Update spacy/cli/info.py Co-authored-by: Sofie Van Landeghem <[email protected]> * Add two implementations of requirements * Clean up requirements sample slightly This should make mypy happy * Update URL help string * Remove requirements option * Add url option to docs * Add URL to spacy info model output, when available * Add types-setuptools to testing reqs * Add types-setuptools to requirements * Add "compatible", expand docstring * Update spacy/cli/info.py Co-authored-by: Adriane Boyd <[email protected]> * Run prettier on CLI docs * Update docs Add a sidebar about finding download URLs, with some examples of the new command. * Add download URLs to table on model page * Apply suggestions from code review Co-authored-by: Adriane Boyd <[email protected]> * Updates from review * download url -> download link * Update docs Co-authored-by: Sofie Van Landeghem <[email protected]> Co-authored-by: Adriane Boyd <[email protected]>
1 parent ea54cf3 commit f5dd1a3

File tree

7 files changed

+127
-28
lines changed

7 files changed

+127
-28
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@ mypy>=0.910,<0.970; platform_machine!='aarch64'
3434
types-dataclasses>=0.1.3; python_version < "3.7"
3535
types-mock>=0.1.1
3636
types-requests
37+
types-setuptools>=57.0.0
3738
black>=22.0,<23.0

spacy/cli/download.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def download_cli(
2020
ctx: typer.Context,
2121
model: str = Arg(..., help="Name of pipeline package to download"),
2222
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
23-
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel")
23+
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
2424
# fmt: on
2525
):
2626
"""
@@ -36,7 +36,12 @@ def download_cli(
3636
download(model, direct, sdist, *ctx.args)
3737

3838

39-
def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -> None:
39+
def download(
40+
model: str,
41+
direct: bool = False,
42+
sdist: bool = False,
43+
*pip_args,
44+
) -> None:
4045
if (
4146
not (is_package("spacy") or is_package("spacy-nightly"))
4247
and "--no-deps" not in pip_args
@@ -50,13 +55,10 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
5055
"dependencies, you'll have to install them manually."
5156
)
5257
pip_args = pip_args + ("--no-deps",)
53-
suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
54-
dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}"
5558
if direct:
5659
components = model.split("-")
5760
model_name = "".join(components[:-1])
5861
version = components[-1]
59-
download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
6062
else:
6163
model_name = model
6264
if model in OLD_MODEL_SHORTCUTS:
@@ -67,13 +69,26 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
6769
model_name = OLD_MODEL_SHORTCUTS[model]
6870
compatibility = get_compatibility()
6971
version = get_version(model_name, compatibility)
70-
download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
72+
73+
filename = get_model_filename(model_name, version, sdist)
74+
75+
download_model(filename, pip_args)
7176
msg.good(
7277
"Download and installation successful",
7378
f"You can now load the package via spacy.load('{model_name}')",
7479
)
7580

7681

82+
def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
83+
dl_tpl = "{m}-{v}/{m}-{v}{s}"
84+
egg_tpl = "#egg={m}=={v}"
85+
suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
86+
filename = dl_tpl.format(m=model_name, v=version, s=suffix)
87+
if sdist:
88+
filename += egg_tpl.format(m=model_name, v=version)
89+
return filename
90+
91+
7792
def get_compatibility() -> dict:
7893
if is_prerelease_version(about.__version__):
7994
version: Optional[str] = about.__version__
@@ -105,6 +120,11 @@ def get_version(model: str, comp: dict) -> str:
105120
return comp[model][0]
106121

107122

123+
def get_latest_version(model: str) -> str:
124+
comp = get_compatibility()
125+
return get_version(model, comp)
126+
127+
108128
def download_model(
109129
filename: str, user_pip_args: Optional[Sequence[str]] = None
110130
) -> None:

spacy/cli/info.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
from typing import Optional, Dict, Any, Union, List
22
import platform
3+
import pkg_resources
4+
import json
35
from pathlib import Path
46
from wasabi import Printer, MarkdownRenderer
57
import srsly
68

79
from ._util import app, Arg, Opt, string_to_list
10+
from .download import get_model_filename, get_latest_version
811
from .. import util
912
from .. import about
1013

@@ -16,17 +19,27 @@ def info_cli(
1619
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
1720
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
1821
exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
22+
url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
1923
# fmt: on
2024
):
2125
"""
2226
Print info about spaCy installation. If a pipeline is specified as an argument,
2327
print its meta information. Flag --markdown prints details in Markdown for easy
2428
copy-pasting to GitHub issues.
2529
30+
Flag --url prints only the download URL of the most recent compatible
31+
version of the pipeline.
32+
2633
DOCS: https://spacy.io/api/cli#info
2734
"""
2835
exclude = string_to_list(exclude)
29-
info(model, markdown=markdown, silent=silent, exclude=exclude)
36+
info(
37+
model,
38+
markdown=markdown,
39+
silent=silent,
40+
exclude=exclude,
41+
url=url,
42+
)
3043

3144

3245
def info(
@@ -35,11 +48,20 @@ def info(
3548
markdown: bool = False,
3649
silent: bool = True,
3750
exclude: Optional[List[str]] = None,
51+
url: bool = False,
3852
) -> Union[str, dict]:
3953
msg = Printer(no_print=silent, pretty=not silent)
4054
if not exclude:
4155
exclude = []
42-
if model:
56+
if url:
57+
if model is not None:
58+
title = f"Download info for pipeline '{model}'"
59+
data = info_model_url(model)
60+
print(data["download_url"])
61+
return data
62+
else:
63+
msg.fail("--url option requires a pipeline name", exits=1)
64+
elif model:
4365
title = f"Info about pipeline '{model}'"
4466
data = info_model(model, silent=silent)
4567
else:
@@ -99,11 +121,43 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
99121
meta["source"] = str(model_path.resolve())
100122
else:
101123
meta["source"] = str(model_path)
124+
download_url = info_installed_model_url(model)
125+
if download_url:
126+
meta["download_url"] = download_url
102127
return {
103128
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
104129
}
105130

106131

132+
def info_installed_model_url(model: str) -> Optional[str]:
133+
"""Given a pipeline name, get the download URL if available, otherwise
134+
return None.
135+
136+
This is only available for pipelines installed as modules that have
137+
dist-info available.
138+
"""
139+
try:
140+
dist = pkg_resources.get_distribution(model)
141+
data = json.loads(dist.get_metadata("direct_url.json"))
142+
return data["url"]
143+
except pkg_resources.DistributionNotFound:
144+
# no such package
145+
return None
146+
except Exception:
147+
# something else, like no file or invalid JSON
148+
return None
149+
150+
def info_model_url(model: str) -> Dict[str, Any]:
151+
"""Return the download URL for the latest version of a pipeline."""
152+
version = get_latest_version(model)
153+
154+
filename = get_model_filename(model, version)
155+
download_url = about.__download_url__ + "/" + filename
156+
release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}"
157+
release_url = release_tpl.format(m=model, v=version)
158+
return {"download_url": download_url, "release_url": release_url}
159+
160+
107161
def get_markdown(
108162
data: Dict[str, Any],
109163
title: Optional[str] = None,

spacy/tests/package/test_requirements.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def test_build_dependencies():
1717
"types-dataclasses",
1818
"types-mock",
1919
"types-requests",
20+
"types-setuptools",
2021
]
2122
# ignore language-specific packages that shouldn't be installed by all
2223
libs_ignore_setup = [

website/docs/api/cli.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,15 @@ $ python -m spacy info [--markdown] [--silent] [--exclude]
7777
$ python -m spacy info [model] [--markdown] [--silent] [--exclude]
7878
```
7979
80-
| Name | Description |
81-
| ------------------------------------------------ | --------------------------------------------------------------------------------------------- |
82-
| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
83-
| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
84-
| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~ |
85-
| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
86-
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
87-
| **PRINTS** | Information about your spaCy installation. |
80+
| Name | Description |
81+
| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
82+
| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
83+
| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
84+
| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~ |
85+
| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
86+
| `--url`, `-u` <Tag variant="new">3.5.0</Tag> | Print the URL to download the most recent compatible version of the pipeline. Requires a pipeline name. ~~bool (flag)~~ |
87+
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
88+
| **PRINTS** | Information about your spaCy installation. |
8889

8990
## validate {#validate new="2" tag="command"}
9091

website/docs/usage/models.md

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -365,15 +365,32 @@ pipeline package can be found.
365365
To download a trained pipeline directly using
366366
[pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
367367
path of the wheel file or archive. Installing the wheel is usually more
368-
efficient. To find the direct link to a package, head over to the
369-
[releases](https://github.com/explosion/spacy-models/releases), right click on
370-
the archive link and copy it to your clipboard.
368+
efficient.
369+
370+
> #### Pipeline Package URLs {#pipeline-urls}
371+
>
372+
> Pretrained pipeline distributions are hosted on
373+
> [Github Releases](https://github.com/explosion/spacy-models/releases), and you
374+
> can find download links there, as well as on the model page. You can also get
375+
> URLs directly from the command line by using `spacy info` with the `--url`
376+
> flag, which may be useful for automation.
377+
>
378+
> ```bash
379+
> spacy info en_core_web_sm --url
380+
> ```
381+
>
382+
> This command will print the URL for the latest version of a pipeline
383+
> compatible with the version of spaCy you're using. Note that in order to look
384+
> up the compatibility information an internet connection is required.
371385
372386
```bash
373387
# With external URL
374388
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
375389
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
376390
391+
# Using spacy info to get the external URL
392+
$ pip install $(spacy info en_core_web_sm --url)
393+
377394
# With local file
378395
$ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl
379396
$ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
@@ -514,21 +531,16 @@ should be specifying them directly.
514531
Because pipeline packages are valid Python packages, you can add them to your
515532
application's `requirements.txt`. If you're running your own internal PyPi
516533
installation, you can upload the pipeline packages there. pip's
517-
[requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format)
518-
supports both package names to download via a PyPi server, as well as direct
519-
URLs.
534+
[requirements file format](https://pip.pypa.io/en/latest/reference/requirements-file-format/)
535+
supports both package names to download via a PyPi server, as well as
536+
[direct URLs](#pipeline-urls).
520537
521538
```text
522539
### requirements.txt
523540
spacy>=3.0.0,<4.0.0
524-
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm
541+
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
525542
```
526543
527-
Specifying `#egg=` with the package name tells pip which package to expect from
528-
the download URL. This way, the package won't be re-downloaded and overwritten
529-
if it's already installed - just like when you're downloading a package from
530-
PyPi.
531-
532544
All pipeline packages are versioned and specify their spaCy dependency. This
533545
ensures cross-compatibility and lets you specify exact version requirements for
534546
each pipeline. If you've [trained](/usage/training) your own pipeline, you can

website/src/templates/models.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ const MODEL_META = {
7676
benchmark_ner: 'NER accuracy',
7777
benchmark_speed: 'Speed',
7878
compat: 'Latest compatible package version for your spaCy installation',
79+
download_link: 'Download link for the pipeline',
7980
}
8081

8182
const LABEL_SCHEME_META = {
@@ -138,6 +139,13 @@ function formatAccuracy(data, lang) {
138139
.filter(item => item)
139140
}
140141

142+
function formatDownloadLink(lang, name, version) {
143+
const fullName = `${lang}_${name}-${version}`
144+
const filename = `${fullName}-py3-none-any.whl`
145+
const url = `https://github.com/explosion/spacy-models/releases/download/${fullName}/${filename}`
146+
return <Link to={url} hideIcon>{filename}</Link>
147+
}
148+
141149
function formatModelMeta(data) {
142150
return {
143151
fullName: `${data.lang}_${data.name}-${data.version}`,
@@ -154,6 +162,7 @@ function formatModelMeta(data) {
154162
labels: isEmptyObj(data.labels) ? null : data.labels,
155163
vectors: formatVectors(data.vectors),
156164
accuracy: formatAccuracy(data.performance, data.lang),
165+
download_link: formatDownloadLink(data.lang, data.name, data.version),
157166
}
158167
}
159168

@@ -244,6 +253,7 @@ const Model = ({
244253
{ label: 'Components', content: components, help: MODEL_META.components },
245254
{ label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
246255
{ label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
256+
{ label: 'Download Link', content: meta.download_link, help: MODEL_META.download_link },
247257
{ label: 'Sources', content: sources, help: MODEL_META.sources },
248258
{ label: 'Author', content: author },
249259
{ label: 'License', content: license },

0 commit comments

Comments
 (0)