Skip to content

Commit ca83bee

Browse files
committed
asset explorer traversal only
Signed-off-by: Ivan Milchev <ivan@mondoo.com>
1 parent 2151fe4 commit ca83bee

File tree

6 files changed

+163
-13
lines changed

6 files changed

+163
-13
lines changed

.claude/skills/staged-discovery/SKILL.md

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,46 @@ func discoverScopedStage(runtime *plugin.Runtime, conn YourConnection, invConfig
153153
}
154154
```
155155

156-
### Step 5: Gate resource methods at higher scopes (if needed)
156+
### Step 5: Mark intermediate assets as traversal-only
157+
158+
When users specify discovery targets (e.g., `--discover pods`), intermediate assets that don't match the targets must still be traversed (to discover children) but should NOT appear in scan results. Set `OptionTraversalOnly` on their connection config.
159+
160+
The provider already knows the discovery targets from `invConfig.Discover.Targets`. When emitting intermediate scope assets in Stage 1, check whether that scope level is targeted:
161+
162+
```go
163+
import "go.mondoo.com/mql/v13/providers-sdk/v1/plugin"
164+
165+
// In your root stage, when emitting child scope assets:
166+
for _, child := range children {
167+
childConfig := invConfig.Clone()
168+
childConfig.Options["your-scope-option"] = child.ID
169+
170+
// Only mark as scannable if this level is explicitly targeted.
171+
// Otherwise it's traversal-only: AssetExplorer connects to it
172+
// (triggering the next stage) but excludes it from scan results.
173+
if !isTargeted(invConfig.Discover.Targets, YourScopeDiscoveryTarget) {
174+
childConfig.Options[plugin.OptionTraversalOnly] = ""
175+
}
176+
177+
childAsset := &inventory.Asset{
178+
PlatformIds: []string{child.PlatformId},
179+
Name: child.Name,
180+
Platform: child.Platform,
181+
Connections: []*inventory.Config{childConfig},
182+
}
183+
in.Spec.Assets = append(in.Spec.Assets, childAsset)
184+
}
185+
```
186+
187+
**Key rules:**
188+
- `OptionTraversalOnly` is set per-asset on the connection config, not globally
189+
- Leaf assets (the bottom of the hierarchy) are never traversal-only — they're always scannable if they match targets
190+
- Mixed targets (e.g., `--discover pods,namespaces`) — if the intermediate level IS a target, don't set `OptionTraversalOnly`. It gets scanned AND traversed.
191+
- `DiscoveryAuto` and `DiscoveryAll` targets mean everything is scannable — never set `OptionTraversalOnly`
192+
193+
Callers use `explorer.ScannableAssets()` instead of `explorer.Connected()` to get only assets that should be scanned. The depth-first traversal still connects everything.
194+
195+
### Step 6: Gate resource methods at higher scopes
157196

158197
When the root scope is scanned, resource methods that load lower-scope data should return empty results to avoid loading everything into the root's cache. This is optional but important for large providers.
159198

@@ -176,9 +215,9 @@ func (r *mqlYourProvider) childScopedResources() ([]interface{}, error) {
176215
}
177216
```
178217

179-
### Step 6: Verify both paths produce the same assets
218+
### Step 7: Verify both paths and discovery targets
180219

181-
Both the legacy and staged paths must discover the same final set of assets (same platform IDs, same names). They differ only in how discovery is chunked.
220+
Both the legacy and staged paths must discover the same final set of assets (same platform IDs, same names). They differ only in how discovery is chunked. Also verify that discovery targets correctly filter scannable assets.
182221

183222
```bash
184223
# Build and install
@@ -192,11 +231,15 @@ mql shell <provider-args>
192231
# Verify the same assets appear
193232
mql shell <provider-args>
194233

234+
# Test discovery target filtering (e.g., only pods, only instances)
235+
# Verify that intermediate assets are traversed but not scanned
236+
mql shell <provider-args> --discover <specific-target>
237+
195238
# Run existing tests
196239
go test ./providers/<name>/...
197240
```
198241

199-
### Step 7: Update .lr.versions if new resources were added
242+
### Step 8: Update .lr.versions if new resources were added
200243

201244
If you added any new resources or fields to support staged discovery, update the `.lr.versions` file:
202245

@@ -214,7 +257,10 @@ make providers/mqlr
214257
- [ ] Child connection configs include the scope option that triggers the next stage
215258
- [ ] `OptionStagedDiscovery` is propagated via `Clone()` to all child configs
216259
- [ ] Resource methods at root scope are gated to avoid loading child-scope data into root cache
260+
- [ ] Intermediate assets set `OptionTraversalOnly` when they don't match discovery targets
261+
- [ ] `DiscoveryAuto` / `DiscoveryAll` targets never set `OptionTraversalOnly`
217262
- [ ] Both legacy and staged paths produce the same set of assets
263+
- [ ] Discovery target filtering works (e.g., `--discover pods` only scans pods, not namespaces)
218264
- [ ] `go build ./providers/<name>/...` compiles
219265
- [ ] `go test ./providers/<name>/...` passes
220266
- [ ] `make test/lint` passes

discovery/asset_explorer.go

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"go.mondoo.com/mql/v13/providers"
1717
inventory "go.mondoo.com/mql/v13/providers-sdk/v1/inventory"
1818
"go.mondoo.com/mql/v13/providers-sdk/v1/inventory/manager"
19+
"go.mondoo.com/mql/v13/providers-sdk/v1/plugin"
1920
"go.mondoo.com/mql/v13/providers-sdk/v1/upstream"
2021
"go.mondoo.com/mql/v13/utils/slicesx"
2122
)
@@ -42,11 +43,12 @@ const (
4243
// tree relationships. Callers receive pointers to TrackedAsset and pass them
4344
// back to AssetExplorer methods.
4445
type TrackedAsset struct {
45-
Asset *inventory.Asset
46-
Runtime *providers.Runtime // nil when Discovered or Closed
47-
State AssetState
48-
Parent *TrackedAsset // nil for root assets
49-
Children []*TrackedAsset // populated when this asset is Connected
46+
Asset *inventory.Asset
47+
Runtime *providers.Runtime // nil when Discovered or Closed
48+
State AssetState
49+
Parent *TrackedAsset // nil for root assets
50+
Children []*TrackedAsset // populated when this asset is Connected
51+
TraversalOnly bool // true if this asset is only for traversal (not scannable)
5052
}
5153

5254
// Display implements the SelectableItem interface from cli/components,
@@ -168,6 +170,23 @@ func (e *AssetExplorer) Connected() []*TrackedAsset {
168170
return result
169171
}
170172

173+
// ScannableAssets returns all connected assets that are not marked as
174+
// traversal-only. Use this instead of Connected() when building the list
175+
// of assets to scan or query — traversal-only assets exist only to discover
176+
// their children and should not appear in scan results.
177+
func (e *AssetExplorer) ScannableAssets() []*TrackedAsset {
178+
e.mu.Lock()
179+
defer e.mu.Unlock()
180+
181+
var result []*TrackedAsset
182+
for _, a := range e.allAssets {
183+
if a.State == AssetConnected && !a.TraversalOnly {
184+
result = append(result, a)
185+
}
186+
}
187+
return result
188+
}
189+
171190
// Connect connects to a tracked asset, creating its runtime and discovering
172191
// its immediate children. Returns the connected asset (whose Children field
173192
// is populated with any newly discovered children).
@@ -292,9 +311,10 @@ func (e *AssetExplorer) discoverChildren(parent *TrackedAsset) {
292311
}
293312

294313
child := &TrackedAsset{
295-
Asset: childAsset,
296-
State: AssetDiscovered,
297-
Parent: parent,
314+
Asset: childAsset,
315+
State: AssetDiscovered,
316+
Parent: parent,
317+
TraversalOnly: hasOption(childAsset, plugin.OptionTraversalOnly),
298318
}
299319
e.allAssets = append(e.allAssets, child)
300320
parent.Children = append(parent.Children, child)
@@ -370,6 +390,16 @@ func (e *AssetExplorer) findByPlatformIDs(ids []string) *TrackedAsset {
370390
return nil
371391
}
372392

393+
// hasOption returns true if the asset has the given option key set in its
394+
// first connection config.
395+
func hasOption(asset *inventory.Asset, key string) bool {
396+
if len(asset.Connections) == 0 || asset.Connections[0].Options == nil {
397+
return false
398+
}
399+
_, ok := asset.Connections[0].Options[key]
400+
return ok
401+
}
402+
373403
// findRootAsset walks up the parent chain to find the root asset for
374404
// prepareAsset labeling. Must be called with e.mu held.
375405
func (e *AssetExplorer) findRootAsset(asset *TrackedAsset) *inventory.Asset {

docs/adr/002-staged-discovery.md

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,62 @@ Stage 2 — Region scope:
115115
API clients — when the region is closed, all its resource data is freed
116116
```
117117

118+
### Traversal-Only Assets (Discovery Target Filtering)
119+
120+
Staged discovery introduces a second concern: **not every intermediate asset should be scanned**. When a user specifies discovery targets like `--discover pods`, they want only pods as scannable assets. Namespaces are still needed for traversal (connecting to a namespace triggers Stage 2 which discovers pods), but namespaces themselves should not appear in the scan results.
121+
122+
This is solved with `OptionTraversalOnly` — an inventory connection option that providers set on intermediate assets when those assets don't match the requested discovery targets. `AssetExplorer` treats traversal-only assets normally for connection and child discovery, but excludes them from scan results via `ScannableAssets()`.
123+
124+
**Provider side** — the provider already knows the discovery targets from `invConfig.Discover.Targets`. When emitting intermediate assets, check whether that level is a target:
125+
126+
```go
127+
// In discoverClusterStage, when emitting namespace assets:
128+
for _, ns := range nss {
129+
nsConfig := invConfig.Clone()
130+
nsConfig.Options[shared.OPTION_NAMESPACE] = ns.Name
131+
132+
// Namespaces are only scannable if explicitly targeted.
133+
// Otherwise they're traversal-only: AssetExplorer connects to them
134+
// (triggering Stage 2) but excludes them from scan results.
135+
if !stringx.ContainsAnyOf(invConfig.Discover.Targets,
136+
DiscoveryNamespaces, DiscoveryAuto, DiscoveryAll) {
137+
nsConfig.Options[plugin.OptionTraversalOnly] = ""
138+
}
139+
140+
ns.Connections = []*inventory.Config{nsConfig}
141+
in.Spec.Assets = append(in.Spec.Assets, ns)
142+
}
143+
```
144+
145+
**AssetExplorer side**`TrackedAsset` exposes a `TraversalOnly` field, populated from the connection option when the asset is connected. Callers use `ScannableAssets()` to get only assets that should be scanned:
146+
147+
```go
148+
// In AssetExplorer
149+
func (e *AssetExplorer) ScannableAssets() []*TrackedAsset {
150+
var result []*TrackedAsset
151+
for _, a := range e.Connected() {
152+
if !a.TraversalOnly {
153+
result = append(result, a)
154+
}
155+
}
156+
return result
157+
}
158+
```
159+
160+
**Caller side** — scan loops use `ScannableAssets()` instead of `Connected()`. The depth-first traversal still connects everything (traversal-only and scannable), but only scannable assets are sent to `SynchronizeAssets` / query execution / scan jobs.
161+
162+
**How this generalizes:**
163+
164+
| Command | Traversal-only | Scannable |
165+
|---|---|---|
166+
| `k8s --discover pods` | namespaces | pods |
167+
| `k8s --discover namespaces` | (none) | cluster + namespaces |
168+
| `k8s --discover all` | (none) | cluster + namespaces + all workloads |
169+
| `gcp --discover compute-instances` | org, projects, service groups | compute instances |
170+
| `aws --discover ec2-instances` | accounts, regions | EC2 instances |
171+
172+
**Mixed targets** (`--discover pods,namespaces`): namespaces are both scannable AND traversal nodes. The provider simply doesn't set `OptionTraversalOnly`. They get scanned and their children get discovered. No special handling needed.
173+
118174
### Provider Implementation Guide
119175

120176
To add staged discovery to a provider:
@@ -172,6 +228,7 @@ Have `AssetExplorer` automatically infer hierarchy from platform IDs or asset me
172228
- **Bounded memory per branch:** Each scope boundary creates a separate runtime with its own MQL resource cache. When a scope is closed (`CloseAsset`), its entire cache — all MQL resource objects, API responses, and connection state — is released. Only one branch of the hierarchy is in memory at a time. A 1000-namespace cluster uses the same peak memory as a 5-namespace cluster.
173229
- **No root cache accumulation:** In single-pass discovery, all resources attach to the root runtime's cache and are never released until the scan completes. Staged discovery breaks this by giving each scope its own cache — pods in namespace A are cached in namespace A's runtime, not the cluster root's. When namespace A is closed, those pods are gone from memory.
174230
- **Reduced API pressure:** Each stage only queries the APIs needed for its scope. No cluster-wide enumeration of every resource type.
231+
- **Discovery target filtering without hierarchy knowledge:** Callers specify what to scan (e.g., `--discover pods`), and providers mark intermediate levels as traversal-only. `AssetExplorer.ScannableAssets()` returns only the targeted assets. The caller doesn't need to know which levels are intermediate — it just connects everything and filters at the end.
175232
- **Composable with AssetExplorer:** Callers don't need to understand stages — they just connect discovered children as usual. The staging is entirely provider-internal.
176233
- **Backward compatible:** The `OptionStagedDiscovery` flag is opt-in. Providers without staged discovery and callers that don't set the flag continue working unchanged.
177234
- **Cache sharing within scope:** `WithParentConnectionId` lets leaf assets within a scope (e.g., pods within a namespace) share that scope's API client cache, avoiding redundant API calls — while keeping the cache isolated from other scopes.

providers-sdk/v1/plugin/connection.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ const (
1212
// workloads per namespace later). When absent, legacy single-pass
1313
// discovery runs unchanged for backward compatibility.
1414
OptionStagedDiscovery = "staged-discovery"
15+
16+
// OptionTraversalOnly marks an asset as a traversal node that should not
17+
// be scanned. AssetExplorer still connects to it (to discover children)
18+
// but excludes it from ScannableAssets(). Providers set this on
19+
// intermediate hierarchy levels (e.g. namespaces) when those levels are
20+
// not in the requested discovery targets.
21+
OptionTraversalOnly = "traversal-only"
1522
)
1623

1724
type Connection interface {

providers/k8s/resources/discovery.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,12 @@ func discoverClusterStage(runtime *plugin.Runtime, conn shared.Connection, invCo
230230
return nil, err
231231
}
232232

233+
// Namespaces are only scannable if explicitly targeted. Otherwise they
234+
// are traversal-only: AssetExplorer connects to them (triggering stage 2
235+
// workload discovery) but excludes them from scan results.
236+
nsIsScannable := stringx.ContainsAnyOf(invConfig.Discover.Targets,
237+
DiscoveryNamespaces, DiscoveryAuto, DiscoveryAll)
238+
233239
for _, ns := range nss {
234240
// Clone without WithParentConnectionId so each namespace gets its own
235241
// resource cache. With a shared parent cache, the k8s MQL resource would
@@ -238,6 +244,10 @@ func discoverClusterStage(runtime *plugin.Runtime, conn shared.Connection, invCo
238244
nsConfig := invConfig.Clone() // Clone() copies Options, propagating OPTION_STAGED_DISCOVERY
239245
nsConfig.Options[shared.OPTION_NAMESPACE] = ns.Name
240246

247+
if !nsIsScannable {
248+
nsConfig.Options[plugin.OptionTraversalOnly] = ""
249+
}
250+
241251
// Override the connection config to route to stage 2, but keep the
242252
// namespace's platform IDs, platform, and labels from discoverNamespaces().
243253
ns.Connections = []*inventory.Config{nsConfig}

providers/k8s/resources/k8s.lr.versions

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright Mondoo, Inc. 2024, 2026
1+
# Copyright (c) Mondoo, Inc.
22
# SPDX-License-Identifier: BUSL-1.1
33

44
k8s 9.0.0

0 commit comments

Comments
 (0)