Skip to content

Commit 4a3106b

Browse files
Track bundle resource counts and state file sizes in telemetry
Adds a new typed BundleResourcesMetadata struct under BundleDeployExperimental, capturing per-resource-type metadata for a bundle deploy: - count of resources of each type declared in the bundle configuration - max, mean, median state size in bytes across resources of that type - whole state file size on disk - deployment engine ("direct" or "terraform") For Terraform deployments the tfstate is translated to the direct- engine representation (via the existing TerraformToGroupName map) before sizing so per-type stats are comparable across engines. The new count field replaces the deprecated DatabricksBundleDeployEvent .resource_*_count fields; both are populated during the transition. The Go mirror marks the deprecated Resource*Count fields with a "// Deprecated:" comment. Measurement is performed at telemetry-emission time by reading the on-disk state file once, so this lands as a single isolated module (bundle/phases/resources_metadata.go) with one new line at the call site — no instrumentation in deploy mutators, state-mgmt code, or bundle.Metrics. To remove: delete the new module and revert one line in telemetry.go plus the proto/Go field. Requires the new resources_metadata field on BundleDeployExperimental from the universe PR. Lumberjack drops unknown fields, so the two PRs can land in either order.
1 parent e1d2a5c commit 4a3106b

4 files changed

Lines changed: 442 additions & 0 deletions

File tree

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
package phases
2+
3+
import (
4+
"cmp"
5+
"context"
6+
"encoding/json"
7+
"errors"
8+
"io/fs"
9+
"os"
10+
"path/filepath"
11+
"slices"
12+
"strings"
13+
14+
"github.com/databricks/cli/bundle"
15+
"github.com/databricks/cli/bundle/deploy/terraform"
16+
"github.com/databricks/cli/bundle/direct/dstate"
17+
"github.com/databricks/cli/libs/dyn"
18+
"github.com/databricks/cli/libs/log"
19+
"github.com/databricks/cli/libs/telemetry/protos"
20+
tfjson "github.com/hashicorp/terraform-json"
21+
)
22+
23+
// collectResourcesMetadata builds a BundleResourcesMetadata for the deploy:
24+
// per-resource-type counts come from the bundle configuration (matching the
25+
// semantics of the deprecated DatabricksBundleDeployEvent.resource_*_count
26+
// fields), and state-size statistics come from the on-disk deployment state
27+
// file. For Terraform deployments the tfstate is translated to the direct-
28+
// engine representation before sizing so per-type stats are comparable across
29+
// engines.
30+
//
31+
// Returns nil only on a complete absence of signal (no resources declared and
32+
// no readable state). Telemetry must never fail a deploy — all parse errors
33+
// are logged at debug level and treated as missing data.
34+
func collectResourcesMetadata(ctx context.Context, b *bundle.Bundle) *protos.BundleResourcesMetadata {
35+
counts := countResourcesByType(ctx, b)
36+
37+
engine, fileSize, sizesByType := readStateForMetadata(ctx, b)
38+
39+
if len(counts) == 0 && len(sizesByType) == 0 && fileSize == 0 {
40+
return nil
41+
}
42+
43+
types := unionKeys(counts, sizesByType)
44+
slices.Sort(types)
45+
46+
resources := make([]protos.ResourceMetadata, 0, len(types))
47+
for _, t := range types {
48+
sizes := sizesByType[t]
49+
slices.SortFunc(sizes, func(a, b int64) int { return cmp.Compare(a, b) })
50+
resources = append(resources, protos.ResourceMetadata{
51+
ResourceType: t,
52+
Count: counts[t],
53+
StateSizeMaxBytes: statMax(sizes),
54+
StateSizeMeanBytes: statMean(sizes),
55+
StateSizeMedianBytes: statMedian(sizes),
56+
})
57+
}
58+
59+
return &protos.BundleResourcesMetadata{
60+
StateEngine: engine,
61+
StateFileSizeBytes: fileSize,
62+
Resources: resources,
63+
}
64+
}
65+
66+
// countResourcesByType walks the bundle config and counts top-level resources
67+
// at "resources.<type>.<name>". Returns map[type]count.
68+
func countResourcesByType(ctx context.Context, b *bundle.Bundle) map[string]int64 {
69+
out := make(map[string]int64)
70+
pattern := dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey())
71+
_, err := dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
72+
if len(p) >= 2 {
73+
out[p[1].Key()]++
74+
}
75+
return v, nil
76+
})
77+
if err != nil {
78+
log.Debugf(ctx, "resources-metadata telemetry: failed to walk config resources: %s", err)
79+
}
80+
return out
81+
}
82+
83+
// readStateForMetadata reads whichever local state file exists (direct
84+
// preferred, then terraform) and returns engine name, whole-file size, and
85+
// per-resource-type sizes. Returns ("", 0, nil) if no state is present or if
86+
// the bundle isn't far enough through initialization to have a target
87+
// selected (which is required to compute state file paths).
88+
func readStateForMetadata(ctx context.Context, b *bundle.Bundle) (string, int64, map[string][]int64) {
89+
if b.Target == nil {
90+
return "", 0, nil
91+
}
92+
93+
if _, localPath := b.StateFilenameDirect(ctx); localPath != "" {
94+
raw, err := readStateFile(localPath)
95+
if err == nil && raw != nil {
96+
return "direct", int64(len(raw)), parseDirectStateSizes(ctx, raw)
97+
}
98+
if err != nil {
99+
log.Debugf(ctx, "resources-metadata telemetry: skipping direct state at %s: %s", localPath, err)
100+
}
101+
}
102+
103+
if _, localPath := b.StateFilenameTerraform(ctx); localPath != "" {
104+
raw, err := readStateFile(localPath)
105+
if errors.Is(err, fs.ErrNotExist) {
106+
altPath := terraformCacheStatePath(ctx, b)
107+
if altPath != localPath && altPath != "" {
108+
raw, err = readStateFile(altPath)
109+
}
110+
}
111+
if err == nil && raw != nil {
112+
return "terraform", int64(len(raw)), parseTerraformStateSizes(ctx, raw)
113+
}
114+
if err != nil {
115+
log.Debugf(ctx, "resources-metadata telemetry: skipping terraform state at %s: %s", localPath, err)
116+
}
117+
}
118+
119+
return "", 0, nil
120+
}
121+
122+
func readStateFile(path string) ([]byte, error) {
123+
if path == "" {
124+
return nil, nil
125+
}
126+
raw, err := os.ReadFile(path)
127+
if errors.Is(err, fs.ErrNotExist) {
128+
return nil, nil
129+
}
130+
return raw, err
131+
}
132+
133+
func terraformCacheStatePath(ctx context.Context, b *bundle.Bundle) string {
134+
dir, err := terraform.Dir(ctx, b)
135+
if err != nil {
136+
return ""
137+
}
138+
return filepath.Join(dir, "terraform.tfstate")
139+
}
140+
141+
func parseDirectStateSizes(ctx context.Context, raw []byte) map[string][]int64 {
142+
var db dstate.Database
143+
if err := json.Unmarshal(raw, &db); err != nil {
144+
log.Debugf(ctx, "resources-metadata telemetry: failed to parse direct state: %s", err)
145+
return nil
146+
}
147+
byType := make(map[string][]int64)
148+
for key, entry := range db.State {
149+
t := resourceTypeFromKey(key)
150+
if t == "" {
151+
continue
152+
}
153+
byType[t] = append(byType[t], int64(len(entry.State)))
154+
}
155+
return byType
156+
}
157+
158+
func parseTerraformStateSizes(ctx context.Context, raw []byte) map[string][]int64 {
159+
var state struct {
160+
Version int `json:"version"`
161+
Resources []struct {
162+
Type string `json:"type"`
163+
Mode tfjson.ResourceMode `json:"mode"`
164+
Instances []struct {
165+
Attributes json.RawMessage `json:"attributes"`
166+
} `json:"instances"`
167+
} `json:"resources"`
168+
}
169+
if err := json.Unmarshal(raw, &state); err != nil {
170+
log.Debugf(ctx, "resources-metadata telemetry: failed to parse terraform state: %s", err)
171+
return nil
172+
}
173+
byType := make(map[string][]int64)
174+
for _, resource := range state.Resources {
175+
if resource.Mode != tfjson.ManagedResourceMode {
176+
continue
177+
}
178+
groupName, ok := terraform.TerraformToGroupName[resource.Type]
179+
if !ok {
180+
continue
181+
}
182+
for _, instance := range resource.Instances {
183+
byType[groupName] = append(byType[groupName], int64(len(instance.Attributes)))
184+
}
185+
}
186+
return byType
187+
}
188+
189+
// resourceTypeFromKey extracts the resource type from a direct-engine state
190+
// key. Direct-engine keys are of the form "resources.<type>.<name>" or
191+
// "resources.<type>.<name>.<sub>" (for permissions/grants/secret_acls).
192+
// Returns "" for keys that don't match.
193+
func resourceTypeFromKey(key string) string {
194+
parts := strings.SplitN(key, ".", 4)
195+
if len(parts) < 3 || parts[0] != "resources" {
196+
return ""
197+
}
198+
if len(parts) == 4 {
199+
// Sub-resources like permissions / grants / secret_acls live at
200+
// "resources.<parent>.<name>.<sub>". Track them under the sub-resource
201+
// type so they aggregate across resource families.
202+
return parts[3]
203+
}
204+
return parts[1]
205+
}
206+
207+
func unionKeys(a map[string]int64, b map[string][]int64) []string {
208+
seen := make(map[string]struct{}, len(a)+len(b))
209+
for k := range a {
210+
seen[k] = struct{}{}
211+
}
212+
for k := range b {
213+
seen[k] = struct{}{}
214+
}
215+
out := make([]string, 0, len(seen))
216+
for k := range seen {
217+
out = append(out, k)
218+
}
219+
return out
220+
}
221+
222+
func statMax(sortedSizes []int64) int64 {
223+
if len(sortedSizes) == 0 {
224+
return 0
225+
}
226+
return sortedSizes[len(sortedSizes)-1]
227+
}
228+
229+
func statMean(sortedSizes []int64) int64 {
230+
if len(sortedSizes) == 0 {
231+
return 0
232+
}
233+
var total int64
234+
for _, s := range sortedSizes {
235+
total += s
236+
}
237+
return total / int64(len(sortedSizes))
238+
}
239+
240+
func statMedian(sortedSizes []int64) int64 {
241+
if len(sortedSizes) == 0 {
242+
return 0
243+
}
244+
return sortedSizes[(len(sortedSizes)-1)/2]
245+
}

0 commit comments

Comments
 (0)