asset explorer traversal only

imilchev · imilchev · commit ca83beebbc40 · 2026-04-01T22:36:12.000+03:00
Signed-off-by: Ivan Milchev &lt;ivan@mondoo.com&gt;
diff --git a/.claude/skills/staged-discovery/SKILL.md b/.claude/skills/staged-discovery/SKILL.md
@@ -153,7 +153,46 @@ func discoverScopedStage(runtime *plugin.Runtime, conn YourConnection, invConfig
 }
 ```
 
-### Step 5: Gate resource methods at higher scopes (if needed)
+### Step 5: Mark intermediate assets as traversal-only
+
+When users specify discovery targets (e.g., `--discover pods`), intermediate assets that don't match the targets must still be traversed (to discover children) but should NOT appear in scan results. Set `OptionTraversalOnly` on their connection config.
+
+The provider already knows the discovery targets from `invConfig.Discover.Targets`. When emitting intermediate scope assets in Stage 1, check whether that scope level is targeted:
+
+```go
+import "go.mondoo.com/mql/v13/providers-sdk/v1/plugin"
+
+// In your root stage, when emitting child scope assets:
+for _, child := range children {
+    childConfig := invConfig.Clone()
+    childConfig.Options["your-scope-option"] = child.ID
+
+    // Only mark as scannable if this level is explicitly targeted.
+    // Otherwise it's traversal-only: AssetExplorer connects to it
+    // (triggering the next stage) but excludes it from scan results.
+    if !isTargeted(invConfig.Discover.Targets, YourScopeDiscoveryTarget) {
+        childConfig.Options[plugin.OptionTraversalOnly] = ""
+    }
+
+    childAsset := &inventory.Asset{
+        PlatformIds: []string{child.PlatformId},
+        Name:        child.Name,
+        Platform:    child.Platform,
+        Connections: []*inventory.Config{childConfig},
+    }
+    in.Spec.Assets = append(in.Spec.Assets, childAsset)
+}
+```
+
+**Key rules:**
+- `OptionTraversalOnly` is set per-asset on the connection config, not globally
+- Leaf assets (the bottom of the hierarchy) are never traversal-only — they're always scannable if they match targets
+- Mixed targets (e.g., `--discover pods,namespaces`) — if the intermediate level IS a target, don't set `OptionTraversalOnly`. It gets scanned AND traversed.
+- `DiscoveryAuto` and `DiscoveryAll` targets mean everything is scannable — never set `OptionTraversalOnly`
+
+Callers use `explorer.ScannableAssets()` instead of `explorer.Connected()` to get only assets that should be scanned. The depth-first traversal still connects everything.
+
+### Step 6: Gate resource methods at higher scopes
 
 When the root scope is scanned, resource methods that load lower-scope data should return empty results to avoid loading everything into the root's cache. This is optional but important for large providers.
 
@@ -176,9 +215,9 @@ func (r *mqlYourProvider) childScopedResources() ([]interface{}, error) {
 }
 ```
 
-### Step 6: Verify both paths produce the same assets
+### Step 7: Verify both paths and discovery targets
 
-Both the legacy and staged paths must discover the same final set of assets (same platform IDs, same names). They differ only in how discovery is chunked.
+Both the legacy and staged paths must discover the same final set of assets (same platform IDs, same names). They differ only in how discovery is chunked. Also verify that discovery targets correctly filter scannable assets.
 
 ```bash
 # Build and install
@@ -192,11 +231,15 @@ mql shell <provider-args>
 # Verify the same assets appear
 mql shell <provider-args>
 
+# Test discovery target filtering (e.g., only pods, only instances)
+# Verify that intermediate assets are traversed but not scanned
+mql shell <provider-args> --discover <specific-target>
+
 # Run existing tests
 go test ./providers/<name>/...
 ```
 
-### Step 7: Update .lr.versions if new resources were added
+### Step 8: Update .lr.versions if new resources were added
 
 If you added any new resources or fields to support staged discovery, update the `.lr.versions` file:
 
@@ -214,7 +257,10 @@ make providers/mqlr
 - [ ] Child connection configs include the scope option that triggers the next stage
 - [ ] `OptionStagedDiscovery` is propagated via `Clone()` to all child configs
 - [ ] Resource methods at root scope are gated to avoid loading child-scope data into root cache
+- [ ] Intermediate assets set `OptionTraversalOnly` when they don't match discovery targets
+- [ ] `DiscoveryAuto` / `DiscoveryAll` targets never set `OptionTraversalOnly`
 - [ ] Both legacy and staged paths produce the same set of assets
+- [ ] Discovery target filtering works (e.g., `--discover pods` only scans pods, not namespaces)
 - [ ] `go build ./providers/<name>/...` compiles
 - [ ] `go test ./providers/<name>/...` passes
 - [ ] `make test/lint` passes
diff --git a/discovery/asset_explorer.go b/discovery/asset_explorer.go
@@ -16,6 +16,7 @@ import (
 	"go.mondoo.com/mql/v13/providers"
 	inventory "go.mondoo.com/mql/v13/providers-sdk/v1/inventory"
 	"go.mondoo.com/mql/v13/providers-sdk/v1/inventory/manager"
+	"go.mondoo.com/mql/v13/providers-sdk/v1/plugin"
 	"go.mondoo.com/mql/v13/providers-sdk/v1/upstream"
 	"go.mondoo.com/mql/v13/utils/slicesx"
 )
@@ -42,11 +43,12 @@ const (
 // tree relationships. Callers receive pointers to TrackedAsset and pass them
 // back to AssetExplorer methods.
 type TrackedAsset struct {
-	Asset    *inventory.Asset
-	Runtime  *providers.Runtime // nil when Discovered or Closed
-	State    AssetState
-	Parent   *TrackedAsset   // nil for root assets
-	Children []*TrackedAsset // populated when this asset is Connected
+	Asset         *inventory.Asset
+	Runtime       *providers.Runtime // nil when Discovered or Closed
+	State         AssetState
+	Parent        *TrackedAsset   // nil for root assets
+	Children      []*TrackedAsset // populated when this asset is Connected
+	TraversalOnly bool            // true if this asset is only for traversal (not scannable)
 }
 
 // Display implements the SelectableItem interface from cli/components,
@@ -168,6 +170,23 @@ func (e *AssetExplorer) Connected() []*TrackedAsset {
 	return result
 }
 
+// ScannableAssets returns all connected assets that are not marked as
+// traversal-only. Use this instead of Connected() when building the list
+// of assets to scan or query — traversal-only assets exist only to discover
+// their children and should not appear in scan results.
+func (e *AssetExplorer) ScannableAssets() []*TrackedAsset {
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	var result []*TrackedAsset
+	for _, a := range e.allAssets {
+		if a.State == AssetConnected && !a.TraversalOnly {
+			result = append(result, a)
+		}
+	}
+	return result
+}
+
 // Connect connects to a tracked asset, creating its runtime and discovering
 // its immediate children. Returns the connected asset (whose Children field
 // is populated with any newly discovered children).
@@ -292,9 +311,10 @@ func (e *AssetExplorer) discoverChildren(parent *TrackedAsset) {
 		}
 
 		child := &TrackedAsset{
-			Asset:  childAsset,
-			State:  AssetDiscovered,
-			Parent: parent,
+			Asset:         childAsset,
+			State:         AssetDiscovered,
+			Parent:        parent,
+			TraversalOnly: hasOption(childAsset, plugin.OptionTraversalOnly),
 		}
 		e.allAssets = append(e.allAssets, child)
 		parent.Children = append(parent.Children, child)
@@ -370,6 +390,16 @@ func (e *AssetExplorer) findByPlatformIDs(ids []string) *TrackedAsset {
 	return nil
 }
 
+// hasOption returns true if the asset has the given option key set in its
+// first connection config.
+func hasOption(asset *inventory.Asset, key string) bool {
+	if len(asset.Connections) == 0 || asset.Connections[0].Options == nil {
+		return false
+	}
+	_, ok := asset.Connections[0].Options[key]
+	return ok
+}
+
 // findRootAsset walks up the parent chain to find the root asset for
 // prepareAsset labeling. Must be called with e.mu held.
 func (e *AssetExplorer) findRootAsset(asset *TrackedAsset) *inventory.Asset {
diff --git a/docs/adr/002-staged-discovery.md b/docs/adr/002-staged-discovery.md
@@ -115,6 +115,62 @@ Stage 2 — Region scope:
     API clients — when the region is closed, all its resource data is freed
 ```
 
+### Traversal-Only Assets (Discovery Target Filtering)
+
+Staged discovery introduces a second concern: **not every intermediate asset should be scanned**. When a user specifies discovery targets like `--discover pods`, they want only pods as scannable assets. Namespaces are still needed for traversal (connecting to a namespace triggers Stage 2 which discovers pods), but namespaces themselves should not appear in the scan results.
+
+This is solved with `OptionTraversalOnly` — an inventory connection option that providers set on intermediate assets when those assets don't match the requested discovery targets. `AssetExplorer` treats traversal-only assets normally for connection and child discovery, but excludes them from scan results via `ScannableAssets()`.
+
+**Provider side** — the provider already knows the discovery targets from `invConfig.Discover.Targets`. When emitting intermediate assets, check whether that level is a target:
+
+```go
+// In discoverClusterStage, when emitting namespace assets:
+for _, ns := range nss {
+    nsConfig := invConfig.Clone()
+    nsConfig.Options[shared.OPTION_NAMESPACE] = ns.Name
+
+    // Namespaces are only scannable if explicitly targeted.
+    // Otherwise they're traversal-only: AssetExplorer connects to them
+    // (triggering Stage 2) but excludes them from scan results.
+    if !stringx.ContainsAnyOf(invConfig.Discover.Targets,
+        DiscoveryNamespaces, DiscoveryAuto, DiscoveryAll) {
+        nsConfig.Options[plugin.OptionTraversalOnly] = ""
+    }
+
+    ns.Connections = []*inventory.Config{nsConfig}
+    in.Spec.Assets = append(in.Spec.Assets, ns)
+}
+```
+
+**AssetExplorer side** — `TrackedAsset` exposes a `TraversalOnly` field, populated from the connection option when the asset is connected. Callers use `ScannableAssets()` to get only assets that should be scanned:
+
+```go
+// In AssetExplorer
+func (e *AssetExplorer) ScannableAssets() []*TrackedAsset {
+    var result []*TrackedAsset
+    for _, a := range e.Connected() {
+        if !a.TraversalOnly {
+            result = append(result, a)
+        }
+    }
+    return result
+}
+```
+
+**Caller side** — scan loops use `ScannableAssets()` instead of `Connected()`. The depth-first traversal still connects everything (traversal-only and scannable), but only scannable assets are sent to `SynchronizeAssets` / query execution / scan jobs.
+
+**How this generalizes:**
+
+| Command | Traversal-only | Scannable |
+|---|---|---|
+| `k8s --discover pods` | namespaces | pods |
+| `k8s --discover namespaces` | (none) | cluster + namespaces |
+| `k8s --discover all` | (none) | cluster + namespaces + all workloads |
+| `gcp --discover compute-instances` | org, projects, service groups | compute instances |
+| `aws --discover ec2-instances` | accounts, regions | EC2 instances |
+
+**Mixed targets** (`--discover pods,namespaces`): namespaces are both scannable AND traversal nodes. The provider simply doesn't set `OptionTraversalOnly`. They get scanned and their children get discovered. No special handling needed.
+
 ### Provider Implementation Guide
 
 To add staged discovery to a provider:
@@ -172,6 +228,7 @@ Have `AssetExplorer` automatically infer hierarchy from platform IDs or asset me
 - **Bounded memory per branch:** Each scope boundary creates a separate runtime with its own MQL resource cache. When a scope is closed (`CloseAsset`), its entire cache — all MQL resource objects, API responses, and connection state — is released. Only one branch of the hierarchy is in memory at a time. A 1000-namespace cluster uses the same peak memory as a 5-namespace cluster.
 - **No root cache accumulation:** In single-pass discovery, all resources attach to the root runtime's cache and are never released until the scan completes. Staged discovery breaks this by giving each scope its own cache — pods in namespace A are cached in namespace A's runtime, not the cluster root's. When namespace A is closed, those pods are gone from memory.
 - **Reduced API pressure:** Each stage only queries the APIs needed for its scope. No cluster-wide enumeration of every resource type.
+- **Discovery target filtering without hierarchy knowledge:** Callers specify what to scan (e.g., `--discover pods`), and providers mark intermediate levels as traversal-only. `AssetExplorer.ScannableAssets()` returns only the targeted assets. The caller doesn't need to know which levels are intermediate — it just connects everything and filters at the end.
 - **Composable with AssetExplorer:** Callers don't need to understand stages — they just connect discovered children as usual. The staging is entirely provider-internal.
 - **Backward compatible:** The `OptionStagedDiscovery` flag is opt-in. Providers without staged discovery and callers that don't set the flag continue working unchanged.
 - **Cache sharing within scope:** `WithParentConnectionId` lets leaf assets within a scope (e.g., pods within a namespace) share that scope's API client cache, avoiding redundant API calls — while keeping the cache isolated from other scopes.
diff --git a/providers-sdk/v1/plugin/connection.go b/providers-sdk/v1/plugin/connection.go
@@ -12,6 +12,13 @@ const (
 	// workloads per namespace later). When absent, legacy single-pass
 	// discovery runs unchanged for backward compatibility.
 	OptionStagedDiscovery = "staged-discovery"
+
+	// OptionTraversalOnly marks an asset as a traversal node that should not
+	// be scanned. AssetExplorer still connects to it (to discover children)
+	// but excludes it from ScannableAssets(). Providers set this on
+	// intermediate hierarchy levels (e.g. namespaces) when those levels are
+	// not in the requested discovery targets.
+	OptionTraversalOnly = "traversal-only"
 )
 
 type Connection interface {
diff --git a/providers/k8s/resources/discovery.go b/providers/k8s/resources/discovery.go
@@ -230,6 +230,12 @@ func discoverClusterStage(runtime *plugin.Runtime, conn shared.Connection, invCo
 		return nil, err
 	}
 
+	// Namespaces are only scannable if explicitly targeted. Otherwise they
+	// are traversal-only: AssetExplorer connects to them (triggering stage 2
+	// workload discovery) but excludes them from scan results.
+	nsIsScannable := stringx.ContainsAnyOf(invConfig.Discover.Targets,
+		DiscoveryNamespaces, DiscoveryAuto, DiscoveryAll)
+
 	for _, ns := range nss {
 		// Clone without WithParentConnectionId so each namespace gets its own
 		// resource cache. With a shared parent cache, the k8s MQL resource would
@@ -238,6 +244,10 @@ func discoverClusterStage(runtime *plugin.Runtime, conn shared.Connection, invCo
 		nsConfig := invConfig.Clone() // Clone() copies Options, propagating OPTION_STAGED_DISCOVERY
 		nsConfig.Options[shared.OPTION_NAMESPACE] = ns.Name
 
+		if !nsIsScannable {
+			nsConfig.Options[plugin.OptionTraversalOnly] = ""
+		}
+
 		// Override the connection config to route to stage 2, but keep the
 		// namespace's platform IDs, platform, and labels from discoverNamespaces().
 		ns.Connections = []*inventory.Config{nsConfig}
diff --git a/providers/k8s/resources/k8s.lr.versions b/providers/k8s/resources/k8s.lr.versions
@@ -1,4 +1,4 @@
-# Copyright Mondoo, Inc. 2024, 2026
+# Copyright (c) Mondoo, Inc.
 # SPDX-License-Identifier: BUSL-1.1
 
 k8s 9.0.0

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright Mondoo, Inc. 2024, 2026`
	`1`	`+# Copyright (c) Mondoo, Inc.`
`2`	`2`	`# SPDX-License-Identifier: BUSL-1.1`
`3`	`3`
`4`	`4`	`k8s 9.0.0`