Skip to content

Commit 4492b38

Browse files
jonkoopseskultety
authored andcommitted
feat(yarn): add workspace focus support for Yarn v4
Allow users to specify workspaces in the package input so that only the dependencies for those workspaces are prefetched via `yarn workspaces focus`. The output is filtered to reflect only the packages actually installed for the focused workspaces, including their transitive workspace dependencies. `yarn workspaces focus` does not support `--mode skip-build`, and `enableScripts: false` does not apply to workspace scripts. To prevent lifecycle scripts from executing during focus, the `scripts` field is stripped from workspace `package.json` files before running the command. Signed-off-by: Jon Koops <jonkoops@gmail.com>
1 parent 808e46a commit 4492b38

12 files changed

Lines changed: 483 additions & 28 deletions

File tree

docs/yarn.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- [Dealing with Yarn Zero-Installs](#dealing-with-yarn-zero-installs)
88
- [Dealing with plugins](#dealing-with-plugins)
99
- [Specifying packages to process](#specifying-packages-to-process)
10+
- [Workspace focus](#workspace-focus)
1011
- [Controlling Yarn's behavior](#controlling-yarns-behavior)
1112
- [Downloading dependencies](#downloading-dependencies)
1213
- [Known pitfalls](#known-pitfalls)
@@ -115,6 +116,43 @@ or more simply by just invoking `hermeto fetch-deps yarn`.
115116
For complete example of how to pre-fetch dependencies, see
116117
[Example: Pre-fetch dependencies](#pre-fetch-dependencies).
117118

119+
### Workspace focus
120+
121+
For monorepo projects using [Yarn workspaces][], you can instruct Hermeto to
122+
fetch only the dependencies needed for specific workspaces rather than the
123+
entire project. This is done by specifying the `workspaces` field in the JSON
124+
input:
125+
126+
```js
127+
{
128+
"type": "yarn",
129+
"path": ".",
130+
// Only fetch dependencies for the "my-app" workspace
131+
"workspaces": ["my-app"]
132+
}
133+
```
134+
135+
Hermeto will fetch only the dependencies needed for the specified workspaces
136+
and their transitive workspace dependencies. Note that `devDependencies` are
137+
also included and will appear in the SBOM.
138+
139+
You can specify multiple workspace names:
140+
141+
```shell
142+
hermeto fetch-deps \
143+
--source ./my-monorepo \
144+
--output ./hermeto-output \
145+
'{"type": "yarn", "workspaces": ["app-frontend", "lib-shared"]}'
146+
```
147+
148+
> **NOTE**
149+
>
150+
> This feature requires **Yarn v4**. The workspace names must match the `name`
151+
> field in each workspace's `package.json`.
152+
153+
When `workspaces` is not specified, Hermeto fetches dependencies for all
154+
workspaces.
155+
118156
### Controlling Yarn's behavior
119157

120158
Hermeto instructs Yarn to download dependencies explicitly declared in
@@ -269,5 +307,6 @@ podman build . \
269307
[project]: https://github.com/hermetoproject/doc-examples/tree/yarn-basic
270308
[yarn install]: https://yarnpkg.com/getting-started/usage/#installing-all-the-dependencies
271309
[Yarn protocols]: https://yarnpkg.com/protocols
310+
[Yarn workspaces]: https://yarnpkg.com/features/workspaces
272311
[yarn]: https://yarnpkg.com
273312
[ZIP archives]: https://yarnpkg.com/features/pnp/#packages-are-stored-inside-zip-archives-how-can-i-access-their-files

hermeto/core/models/input.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,14 @@ class YarnPackageInput(_PackageInputBase):
396396
"""Accepted input for a yarn package."""
397397

398398
type: Literal["yarn"]
399+
workspaces: list[str] | None = None
400+
401+
@pydantic.field_validator("workspaces")
402+
@classmethod
403+
def _workspaces_not_empty(cls, workspaces: list[str] | None) -> list[str] | None:
404+
if workspaces is not None and len(workspaces) == 0:
405+
raise ValueError("'workspaces' must not be an empty list, omit the field instead")
406+
return workspaces
399407

400408

401409
PackageInput = Annotated[

hermeto/core/package_managers/yarn/main.py

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,20 @@
99
from hermeto.core.models.input import Request
1010
from hermeto.core.models.output import Component, EnvironmentVariable, RequestOutput
1111
from hermeto.core.models.sbom import create_backend_annotation
12+
from hermeto.core.package_managers.yarn.locators import WorkspaceLocator
1213
from hermeto.core.package_managers.yarn.project import (
14+
PackageJson,
1315
Plugin,
1416
Project,
1517
YarnRc,
1618
get_semver_from_package_manager,
1719
get_semver_from_yarn_path,
1820
)
19-
from hermeto.core.package_managers.yarn.resolver import create_components, resolve_packages
21+
from hermeto.core.package_managers.yarn.resolver import (
22+
Package,
23+
create_components,
24+
resolve_packages,
25+
)
2026
from hermeto.core.package_managers.yarn.utils import (
2127
VersionsRange,
2228
extract_yarn_version_from_env,
@@ -35,7 +41,7 @@ def fetch_yarn_source(request: Request) -> RequestOutput:
3541
path = request.source_dir.join_within_root(package.path)
3642
project = Project.from_source_dir(path)
3743

38-
components.extend(_resolve_yarn_project(project, request.output_dir))
44+
components.extend(_resolve_yarn_project(project, request.output_dir, package.workspaces))
3945

4046
annotations = []
4147
if backend_annotation := create_backend_annotation(components, "yarn"):
@@ -101,21 +107,38 @@ def _verify_repository(project: Project) -> None:
101107
_check_lockfile(project)
102108

103109

104-
def _resolve_yarn_project(project: Project, output_dir: RootedPath) -> list[Component]:
110+
def _resolve_yarn_project(
111+
project: Project,
112+
output_dir: RootedPath,
113+
workspaces: list[str] | None = None,
114+
) -> list[Component]:
105115
"""Process a request for a single yarn source directory.
106116
107117
:param project: the directory to be processed.
108118
:param output_dir: the directory where the prefetched dependencies will be placed.
119+
:param workspaces: optional list of workspace names to focus on (Yarn v4 only).
109120
:raises PackageManagerError: if fetching dependencies fails
110121
"""
111122
log.info(f"Fetching the yarn dependencies at the subpath {project.source_dir}")
112123

113124
version = _configure_yarn_version(project)
125+
126+
if workspaces and version < semver.Version.parse("4.0.0"):
127+
raise PackageRejected(
128+
f"Workspace focus requires Yarn v4 or later, but this project uses Yarn {version}",
129+
solution="Either upgrade to Yarn v4 or remove the 'workspaces' field from the input.",
130+
)
131+
114132
_verify_repository(project)
115133

116134
_set_yarnrc_configuration(project, output_dir, version)
117-
packages = resolve_packages(project.source_dir)
118-
_fetch_dependencies(project.source_dir)
135+
136+
packages = resolve_packages(project.source_dir, workspaces)
137+
138+
if workspaces:
139+
_strip_workspace_scripts(project.source_dir, packages)
140+
141+
_fetch_dependencies(project.source_dir, workspaces)
119142

120143
return create_components(packages, project, output_dir)
121144

@@ -241,14 +264,44 @@ def _set_yarnrc_configuration(
241264
yarn_rc.write()
242265

243266

244-
def _fetch_dependencies(source_dir: RootedPath) -> None:
245-
"""Fetch dependencies using 'yarn install'.
267+
def _strip_workspace_scripts(source_dir: RootedPath, packages: list[Package]) -> None:
268+
"""Remove scripts from workspace package.json files.
269+
270+
yarn workspaces focus does not support --mode skip-build, and enableScripts: false
271+
does not apply to workspace scripts (https://github.com/yarnpkg/berry/pull/4781).
272+
Stripping the scripts field prevents lifecycle scripts from executing during focus.
273+
274+
:param source_dir: the project source directory.
275+
:param packages: packages returned by ``resolve_packages``, used to find workspace paths.
276+
"""
277+
for pkg in packages:
278+
locator = pkg.parsed_locator
279+
if not isinstance(locator, WorkspaceLocator):
280+
continue
281+
pkg_json_path = source_dir.join_within_root(locator.relpath, "package.json")
282+
if not pkg_json_path.path.exists():
283+
continue
284+
pkg_json = PackageJson.from_file(pkg_json_path)
285+
if "scripts" in pkg_json:
286+
del pkg_json["scripts"]
287+
pkg_json.write()
288+
289+
290+
def _fetch_dependencies(source_dir: RootedPath, workspaces: list[str] | None = None) -> None:
291+
"""Fetch dependencies using 'yarn install' or 'yarn workspaces focus'.
292+
293+
When workspaces are specified, only the dependencies of those workspaces (and their
294+
transitive workspace dependencies) are installed via 'yarn workspaces focus'.
246295
247296
:param source_dir: the directory in which the yarn command will be called.
248-
:raises PackageManagerError: if the 'yarn install' command fails.
297+
:param workspaces: optional list of workspace names to focus on (Yarn v4 only).
298+
:raises PackageManagerError: if the yarn command fails.
249299
"""
250300
try:
251-
run_yarn_cmd(["install", "--mode", "skip-build"], source_dir)
301+
if workspaces:
302+
run_yarn_cmd(["workspaces", "focus", *workspaces], source_dir)
303+
else:
304+
run_yarn_cmd(["install", "--mode", "skip-build"], source_dir)
252305
except PackageManagerError as e:
253306
# TODO: this follows a precedent set in resolver. Either a more robust way for
254307
# dealing with this must be found or a comment provided that such methods do not exist.

hermeto/core/package_managers/yarn/resolver.py

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -141,20 +141,46 @@ class _YarnInfoEntry(pydantic.BaseModel):
141141
children: _YarnInfoChildren
142142

143143

144-
def resolve_packages(source_dir: RootedPath) -> list[Package]:
144+
def resolve_packages(source_dir: RootedPath, workspaces: list[str] | None = None) -> list[Package]:
145145
"""Fetch and parse package data from the 'yarn info' output.
146146
147+
When *workspaces* is provided, runs ``yarn workspace $name info`` for each
148+
workspace and deduplicates the results. Otherwise, runs ``yarn info --all``
149+
to report the full dependency graph.
150+
147151
This function also performs validation to ensure that the current yarn project can be
148152
processed.
149153
154+
:param source_dir: the directory containing the yarn project.
155+
:param workspaces: optional list of workspace names to scope the query to.
150156
:raises UnsupportedFeature: if an unsupported locator type is found in 'yarn info' output
151157
:raises PackageManagerError: if the 'yarn info' command fails.
152158
"""
159+
if workspaces:
160+
packages = _resolve_workspace_packages(source_dir, workspaces)
161+
else:
162+
packages = _resolve_all_packages(source_dir)
163+
164+
n_unsupported = 0
165+
for package in packages:
166+
try:
167+
_ = package.parsed_locator
168+
except UnsupportedFeature as e:
169+
log.error(e)
170+
n_unsupported += 1
171+
172+
if n_unsupported > 0:
173+
raise UnsupportedFeature(
174+
f"Found {n_unsupported} unsupported dependencies, more details in the logs."
175+
)
176+
177+
return packages
178+
179+
180+
def _get_packages_from_yarn_info(args: list[str], source_dir: RootedPath) -> list[Package]:
181+
"""Run a ``yarn info`` variant and return the parsed packages."""
153182
try:
154-
# --all: report dependencies of all workspaces, not just the active workspace
155-
# --recursive: report transitive dependencies, not just direct ones
156-
# --cache: include info about the cache entry for each dependency
157-
result = run_yarn_cmd(["info", "--all", "--recursive", "--cache", "--json"], source_dir)
183+
result = run_yarn_cmd(args, source_dir)
158184
except PackageManagerError as e:
159185
if e.stderr and "isn't supported by any available resolver" in e.stderr:
160186
raise UnsupportedFeature(
@@ -170,22 +196,33 @@ def resolve_packages(source_dir: RootedPath) -> list[Package]:
170196
raise
171197

172198
# the result is not a valid json list, but a sequence of json objects separated by line breaks
173-
packages = [Package.from_info_string(info) for info in result.splitlines()]
199+
return [Package.from_info_string(info) for info in result.splitlines()]
174200

175-
n_unsupported = 0
176-
for package in packages:
177-
try:
178-
_ = package.parsed_locator
179-
except UnsupportedFeature as e:
180-
log.error(e)
181-
n_unsupported += 1
182201

183-
if n_unsupported > 0:
184-
raise UnsupportedFeature(
185-
f"Found {n_unsupported} unsupported dependencies, more details in the logs."
202+
def _resolve_all_packages(source_dir: RootedPath) -> list[Package]:
203+
# --all: report dependencies of all workspaces, not just the active workspace
204+
# --recursive: report transitive dependencies, not just direct ones
205+
# --cache: include info about the cache entry for each dependency
206+
return _get_packages_from_yarn_info(
207+
["info", "--all", "--recursive", "--cache", "--json"], source_dir
208+
)
209+
210+
211+
def _resolve_workspace_packages(source_dir: RootedPath, workspaces: list[str]) -> list[Package]:
212+
seen: dict[str, Package] = {}
213+
214+
for ws_name in workspaces:
215+
# Run info scoped to a single workspace (without --all), so yarn only reports
216+
# the transitive dependencies of that workspace rather than the full project.
217+
packages = _get_packages_from_yarn_info(
218+
["workspace", ws_name, "info", "--recursive", "--cache", "--json"], source_dir
186219
)
220+
# Deduplicate across workspaces: overlapping deps appear only once
221+
for pkg in packages:
222+
if pkg.raw_locator not in seen:
223+
seen[pkg.raw_locator] = pkg
187224

188-
return packages
225+
return list(seen.values())
189226

190227

191228
def create_components(
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
environment_variables:
2+
- name: YARN_ENABLE_GLOBAL_CACHE
3+
value: 'false'
4+
- name: YARN_ENABLE_IMMUTABLE_CACHE
5+
value: 'false'
6+
- name: YARN_ENABLE_MIRROR
7+
value: 'true'
8+
- name: YARN_GLOBAL_FOLDER
9+
value: ${output_dir}/deps/yarn
10+
project_files: []

0 commit comments

Comments
 (0)