mondoohq · imilchev · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/.claude/skills/staged-discovery/SKILL.md b/.claude/skills/staged-discovery/SKILL.md
@@ -153,7 +153,41 @@ func discoverScopedStage(runtime *plugin.Runtime, conn YourConnection, invConfig
 }
 ```
 
-### Step 5: Gate resource methods at higher scopes (if needed)
+### Step 5: Strip platform IDs from non-targeted intermediate assets
+
+When users specify discovery targets (e.g., `--discover pods`), intermediate assets that don't match the targets must still be emitted (so `AssetExplorer` connects to them and discovers their children), but should NOT be scanned. The simplest way: **strip their platform IDs**. The existing "no platform IDs → skip" logic in `AssetExplorer` and the scanner already handles this — no caller-side changes needed.
+
+```go
+// In your root stage, when emitting child scope assets:
+isScopeTargeted := isTargeted(invConfig.Discover.Targets, YourScopeDiscoveryTarget)
+
+for _, child := range children {
+    childConfig := invConfig.Clone()
+    childConfig.Options["your-scope-option"] = child.ID
+
+    childAsset := &inventory.Asset{
+        Name:        child.Name,
+        Platform:    child.Platform,
+        Connections: []*inventory.Config{childConfig},
+    }
+
+    // Only give platform IDs to assets that should be scanned.
+    // Without platform IDs, the scanner skips them but AssetExplorer
+    // still connects to trigger the next discovery stage.
+    if isScopeTargeted {
+        childAsset.PlatformIds = []string{child.PlatformId}
+    }
+
+    in.Spec.Assets = append(in.Spec.Assets, childAsset)
+}
+```
+
+**Key rules:**
+- `DiscoveryAuto` and `DiscoveryAll` targets mean everything is scannable — always keep platform IDs
+- Mixed targets (e.g., `--discover pods,namespaces`) — if the intermediate level IS a target, keep its platform IDs
+- Leaf assets at the bottom of the hierarchy always keep their platform IDs if they match targets
+
+### Step 6: Gate resource methods at higher scopes (if needed)
 
 When the root scope is scanned, resource methods that load lower-scope data should return empty results to avoid loading everything into the root's cache. This is optional but important for large providers.
 
@@ -176,9 +210,9 @@ func (r *mqlYourProvider) childScopedResources() ([]interface{}, error) {
 }
 ```
 
-### Step 6: Verify both paths produce the same assets
+### Step 7: Verify both paths and discovery targets
 
-Both the legacy and staged paths must discover the same final set of assets (same platform IDs, same names). They differ only in how discovery is chunked.
+Both the legacy and staged paths must discover the same final set of assets (same platform IDs, same names). They differ only in how discovery is chunked. Also verify that discovery targets correctly filter scannable assets.
 
 ```bash
 # Build and install
@@ -192,11 +226,15 @@ mql shell <provider-args>
 # Verify the same assets appear
 mql shell <provider-args>
 
+# Test discovery target filtering (e.g., only pods, only instances)
+# Verify that intermediate assets are traversed but not scanned
+mql shell <provider-args> --discover <specific-target>
+
 # Run existing tests
 go test ./providers/<name>/...
 ```
 
-### Step 7: Update .lr.versions if new resources were added
+### Step 8: Update .lr.versions if new resources were added
 
 If you added any new resources or fields to support staged discovery, update the `.lr.versions` file:
 
@@ -214,7 +252,10 @@ make providers/mqlr
 - [ ] Child connection configs include the scope option that triggers the next stage
 - [ ] `OptionStagedDiscovery` is propagated via `Clone()` to all child configs
 - [ ] Resource methods at root scope are gated to avoid loading child-scope data into root cache
+- [ ] Intermediate assets have platform IDs stripped when not in discovery targets
+- [ ] `DiscoveryAuto` / `DiscoveryAll` targets always keep platform IDs on all assets
 - [ ] Both legacy and staged paths produce the same set of assets
+- [ ] Discovery target filtering works (e.g., `--discover pods` only scans pods, not namespaces)
 - [ ] `go build ./providers/<name>/...` compiles
 - [ ] `go test ./providers/<name>/...` passes
 - [ ] `make test/lint` passes
diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
@@ -39,8 +39,8 @@ ciscocatalyst
 clcerts
 cloudflare
 Clusterwide
-cmek
 cmdline
+cmek
 cmnd
 cname
 compressratio
@@ -129,8 +129,8 @@ jira
 jsonbody
 junos
 kqueue
-kubenet
 KSK
+kubenet
 labelmatchstatement
 launchconfiguration
 launchtemplate
@@ -165,8 +165,8 @@ naflags
 natgateway
 networkfirewall
 networkinterface
-nft
 nexthop
+nft
 nmap
 nocerts
 nodegroup
@@ -176,6 +176,7 @@ nokeys
 notebookinstancedetails
 nproc
 nsrecord
+nss
 nullgroup
 nullstring
 oidc
@@ -248,6 +249,7 @@ sqli
 sqlimatchstatement
 sqlserver
 Sspr
+stringx
 superusers
 switchports
 tailscale
@@ -270,10 +272,10 @@ Utc
 valkey
 VAULTNAME
 Vcpus
-Veth
 vdcs
 vdev
 vertexai
+Veth
 VGeneration
 virtualmachine
 vlans

diff --git a/docs/adr/002-staged-discovery.md b/docs/adr/002-staged-discovery.md
@@ -77,6 +77,52 @@ When scanning the cluster asset (Stage 1), namespace-scoped resource methods (e.
 - If `OPTION_NAMESPACE` is set → namespace scope → return resources filtered to that namespace
 - If `OptionStagedDiscovery` is absent → legacy path → load everything (backward compatible)
 
+### Traversal-Only Assets (Discovery Target Filtering)
+
+Staged discovery introduces a second concern: **not every intermediate asset should be scanned**. When a user specifies discovery targets like `--discover pods`, they want only pods as scannable assets. Namespaces are still needed for traversal (connecting to a namespace triggers Stage 2 which discovers pods), but namespaces themselves should not appear in scan results.
+
+This is solved by **stripping platform IDs** from intermediate assets that don't match the requested discovery targets. `AssetExplorer` and the scanner already skip assets without platform IDs (they log a warning, close the asset, and continue). By emitting these assets without platform IDs, they serve purely as traversal nodes — `AssetExplorer` connects to them (triggering the next discovery stage and populating their children), but the scanner never adds them to the progress bar or sends them for scanning.
+
+**Provider side** — the provider already knows the discovery targets from `invConfig.Discover.Targets`. When emitting intermediate assets, check whether that level is a target and strip platform IDs if not:
+
+```go
+// In discoverClusterStage, when emitting namespace assets:
+nsIsScannable := stringx.ContainsAnyOf(invConfig.Discover.Targets,
+    DiscoveryNamespaces, DiscoveryAuto, DiscoveryAll)
+
+for _, ns := range nss {
+    nsConfig := invConfig.Clone()
+    nsConfig.Options[shared.OPTION_NAMESPACE] = ns.Name
+
+    // Namespaces that aren't a discovery target get their platform IDs
+    // stripped. AssetExplorer still connects to them (triggering stage 2)
+    // but the scanner skips them because they have no platform IDs.
+    if !nsIsScannable {
+        ns.PlatformIds = nil
+    }
+
+    ns.Connections = []*inventory.Config{nsConfig}
+    in.Spec.Assets = append(in.Spec.Assets, ns)
+}
+```
+
+**No caller-side changes needed.** The existing "no platform IDs → skip" logic in `AssetExplorer` and the scanner handles everything:
+- Assets without platform IDs are not added to the progress bar
+- Assets without platform IDs are not sent for scanning
+- Assets without platform IDs are still connected (to discover children), then closed
+
+**How this generalizes:**
+
+| Command | No platform IDs (traversal only) | With platform IDs (scannable) |
+|---|---|---|
+| `k8s --discover pods` | namespaces | pods |
+| `k8s --discover namespaces` | (none) | cluster + namespaces |
+| `k8s --discover all` | (none) | cluster + namespaces + all workloads |
+| `gcp --discover compute-instances` | org, projects, service groups | compute instances |
+| `aws --discover ec2-instances` | accounts, regions | EC2 instances |
+
+**Mixed targets** (`--discover pods,namespaces`): namespaces are both scannable AND traversal nodes. The provider keeps their platform IDs intact. They get scanned and their children get discovered. No special handling needed.
+
 ### Applying to Other Providers
 
 The pattern generalizes to any provider with a hierarchical resource model. The key insight: **each level of the hierarchy becomes a discovery stage, and the connection config for child assets encodes which stage to run next.** Crucially, each stage boundary creates a new runtime with its own MQL resource cache — when that scope is closed, all cached resources under it are released.
@@ -172,6 +218,7 @@ Have `AssetExplorer` automatically infer hierarchy from platform IDs or asset me
 - **Bounded memory per branch:** Each scope boundary creates a separate runtime with its own MQL resource cache. When a scope is closed (`CloseAsset`), its entire cache — all MQL resource objects, API responses, and connection state — is released. Only one branch of the hierarchy is in memory at a time. A 1000-namespace cluster uses the same peak memory as a 5-namespace cluster.
 - **No root cache accumulation:** In single-pass discovery, all resources attach to the root runtime's cache and are never released until the scan completes. Staged discovery breaks this by giving each scope its own cache — pods in namespace A are cached in namespace A's runtime, not the cluster root's. When namespace A is closed, those pods are gone from memory.
 - **Reduced API pressure:** Each stage only queries the APIs needed for its scope. No cluster-wide enumeration of every resource type.
+- **Discovery target filtering with zero caller changes:** Providers strip platform IDs from intermediate assets that don't match discovery targets. The existing "no platform IDs → skip" logic in `AssetExplorer` and the scanner handles the rest — no new flags, fields, or methods needed on the caller side.
 - **Composable with AssetExplorer:** Callers don't need to understand stages — they just connect discovered children as usual. The staging is entirely provider-internal.
 - **Backward compatible:** The `OptionStagedDiscovery` flag is opt-in. Providers without staged discovery and callers that don't set the flag continue working unchanged.
 - **Cache sharing within scope:** `WithParentConnectionId` lets leaf assets within a scope (e.g., pods within a namespace) share that scope's API client cache, avoiding redundant API calls — while keeping the cache isolated from other scopes.

diff --git a/providers/grafana/resources/grafana.lr.go b/providers/grafana/resources/grafana.lr.go
diff --git a/providers/k8s/resources/discovery.go b/providers/k8s/resources/discovery.go
@@ -230,6 +230,14 @@ func discoverClusterStage(runtime *plugin.Runtime, conn shared.Connection, invCo
 		return nil, err
 	}
 
+	// Namespaces are only scannable if explicitly targeted. When they are
+	// not a target, strip their platform IDs so the existing "no platform
+	// IDs → skip" logic in AssetExplorer/scanner prevents them from being
+	// scanned or added to the progress bar. They are still emitted so that
+	// AssetExplorer connects to them (triggering stage 2 workload discovery).
+	nsIsScannable := stringx.ContainsAnyOf(invConfig.Discover.Targets,
+		DiscoveryNamespaces, DiscoveryAuto, DiscoveryAll)
+
 	for _, ns := range nss {
 		// Clone without WithParentConnectionId so each namespace gets its own
 		// resource cache. With a shared parent cache, the k8s MQL resource would
@@ -238,8 +246,12 @@ func discoverClusterStage(runtime *plugin.Runtime, conn shared.Connection, invCo
 		nsConfig := invConfig.Clone() // Clone() copies Options, propagating OPTION_STAGED_DISCOVERY
 		nsConfig.Options[shared.OPTION_NAMESPACE] = ns.Name
 
+		if !nsIsScannable {
+			ns.PlatformIds = nil
+		}
+
 		// Override the connection config to route to stage 2, but keep the
-		// namespace's platform IDs, platform, and labels from discoverNamespaces().
+		// namespace's platform, and labels from discoverNamespaces().
 		ns.Connections = []*inventory.Config{nsConfig}
 		in.Spec.Assets = append(in.Spec.Assets, ns)
 	}