Skip to content

Commit 77ec7bc

Browse files
Track bundle resource counts and state file sizes in telemetry
Adds a new typed BundleResourcesMetadata struct under BundleDeployExperimental, capturing per-resource-type metadata for a bundle deploy: - count of resources of each type declared in the bundle configuration - max, mean, median state size in bytes across resources of that type - whole state file size on disk - deployment engine ("direct" or "terraform") For Terraform deployments the tfstate is translated to the direct- engine representation (via the existing TerraformToGroupName map) before sizing so per-type stats are comparable across engines. The new count field replaces the deprecated DatabricksBundleDeployEvent .resource_*_count fields; both are populated during the transition. The Go mirror marks the deprecated Resource*Count fields with a "// Deprecated:" comment. Measurement is performed at telemetry-emission time by reading the on-disk state file once, so this lands as a single isolated module (bundle/phases/resources_metadata.go) with one new line at the call site — no instrumentation in deploy mutators, state-mgmt code, or bundle.Metrics. To remove: delete the new module and revert one line in telemetry.go plus the proto/Go field. Requires the new resources_metadata field on BundleDeployExperimental from the universe PR. Lumberjack drops unknown fields, so the two PRs can land in either order.
1 parent e1d2a5c commit 77ec7bc

6 files changed

Lines changed: 510 additions & 0 deletions

File tree

acceptance/bundle/telemetry/deploy/out.telemetry.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,26 @@
3535
"[UUID]",
3636
"[UUID]"
3737
],
38+
"resources_metadata": {
39+
"state_engine": "direct",
40+
"state_file_size_bytes": SMALL_INT,
41+
"resources": [
42+
{
43+
"resource_type": "jobs",
44+
"count": 3,
45+
"state_size_max_bytes": SMALL_INT,
46+
"state_size_mean_bytes": SMALL_INT,
47+
"state_size_median_bytes": SMALL_INT
48+
},
49+
{
50+
"resource_type": "pipelines",
51+
"count": 2,
52+
"state_size_max_bytes": SMALL_INT,
53+
"state_size_mean_bytes": SMALL_INT,
54+
"state_size_median_bytes": SMALL_INT
55+
}
56+
]
57+
},
3858
"experimental": {
3959
"configuration_file_count": 1,
4060
"variable_count": 0,

acceptance/bundle/telemetry/test.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,22 @@ New = '[OS]'
2020
[[Repls]]
2121
Old = '"local_cache_measurements_ms": \[[^\]]*\]'
2222
New = '"local_cache_measurements_ms": [...redacted...]'
23+
24+
# Normalize byte-size measurements in resources_metadata to placeholders.
25+
# Exact byte counts depend on resource state JSON formatting and would
26+
# make these golden files brittle across SDK changes.
27+
[[Repls]]
28+
Old = '"state_file_size_bytes": \d+'
29+
New = '"state_file_size_bytes": SMALL_INT'
30+
31+
[[Repls]]
32+
Old = '"state_size_max_bytes": \d+'
33+
New = '"state_size_max_bytes": SMALL_INT'
34+
35+
[[Repls]]
36+
Old = '"state_size_mean_bytes": \d+'
37+
New = '"state_size_mean_bytes": SMALL_INT'
38+
39+
[[Repls]]
40+
Old = '"state_size_median_bytes": \d+'
41+
New = '"state_size_median_bytes": SMALL_INT'
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
package phases
2+
3+
import (
4+
"cmp"
5+
"context"
6+
"encoding/json"
7+
"errors"
8+
"io/fs"
9+
"os"
10+
"path/filepath"
11+
"slices"
12+
"strconv"
13+
"strings"
14+
15+
"github.com/databricks/cli/bundle"
16+
"github.com/databricks/cli/bundle/deploy/terraform"
17+
"github.com/databricks/cli/bundle/direct/dstate"
18+
"github.com/databricks/cli/libs/dyn"
19+
"github.com/databricks/cli/libs/log"
20+
"github.com/databricks/cli/libs/telemetry/protos"
21+
tfjson "github.com/hashicorp/terraform-json"
22+
)
23+
24+
// collectResourcesMetadata builds a BundleResourcesMetadata for the deploy:
25+
// per-resource-type counts come from the bundle configuration (matching the
26+
// semantics of the deprecated DatabricksBundleDeployEvent.resource_*_count
27+
// fields), and state-size statistics come from the on-disk deployment state
28+
// file. For Terraform deployments the tfstate is translated to the direct-
29+
// engine representation before sizing so per-type stats are comparable across
30+
// engines.
31+
//
32+
// Returns nil only on a complete absence of signal (no resources declared and
33+
// no readable state). Telemetry must never fail a deploy — all parse errors
34+
// are logged at debug level and treated as missing data.
35+
func collectResourcesMetadata(ctx context.Context, b *bundle.Bundle) *protos.BundleResourcesMetadata {
36+
counts := countResourcesByType(ctx, b)
37+
38+
engine, fileSize, sizesByType := readStateForMetadata(ctx, b)
39+
40+
if len(counts) == 0 && len(sizesByType) == 0 && fileSize == 0 {
41+
return nil
42+
}
43+
44+
types := unionKeys(counts, sizesByType)
45+
slices.Sort(types)
46+
47+
resources := make([]protos.ResourceMetadata, 0, len(types))
48+
for _, t := range types {
49+
sizes := sizesByType[t]
50+
slices.SortFunc(sizes, func(a, b int64) int { return cmp.Compare(a, b) })
51+
resources = append(resources, protos.ResourceMetadata{
52+
ResourceType: t,
53+
Count: counts[t],
54+
StateSizeMaxBytes: statMax(sizes),
55+
StateSizeMeanBytes: statMean(sizes),
56+
StateSizeMedianBytes: statMedian(sizes),
57+
})
58+
}
59+
60+
return &protos.BundleResourcesMetadata{
61+
StateEngine: engine,
62+
StateFileSizeBytes: fileSize,
63+
Resources: resources,
64+
}
65+
}
66+
67+
// countResourcesByType walks the bundle config and counts top-level resources
68+
// at "resources.<type>.<name>". Returns map[type]count.
69+
func countResourcesByType(ctx context.Context, b *bundle.Bundle) map[string]int64 {
70+
out := make(map[string]int64)
71+
pattern := dyn.NewPattern(dyn.Key("resources"), dyn.AnyKey(), dyn.AnyKey())
72+
_, err := dyn.MapByPattern(b.Config.Value(), pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
73+
if len(p) >= 2 {
74+
out[p[1].Key()]++
75+
}
76+
return v, nil
77+
})
78+
if err != nil {
79+
log.Debugf(ctx, "resources-metadata telemetry: failed to walk config resources: %s", err)
80+
}
81+
return out
82+
}
83+
84+
// readStateForMetadata reads whichever local state file exists (direct
85+
// preferred, then terraform) and returns engine name, whole-file size, and
86+
// per-resource-type sizes. Returns ("", 0, nil) if no state is present or if
87+
// the bundle isn't far enough through initialization to have a target
88+
// selected (which is required to compute state file paths).
89+
func readStateForMetadata(ctx context.Context, b *bundle.Bundle) (string, int64, map[string][]int64) {
90+
if b.Target == nil {
91+
return "", 0, nil
92+
}
93+
94+
if _, localPath := b.StateFilenameDirect(ctx); localPath != "" {
95+
raw, err := readStateFile(localPath)
96+
if err == nil && raw != nil {
97+
return "direct", int64(len(raw)), parseDirectStateSizes(ctx, raw)
98+
}
99+
if err != nil {
100+
log.Debugf(ctx, "resources-metadata telemetry: skipping direct state at %s: %s", localPath, err)
101+
}
102+
}
103+
104+
if _, localPath := b.StateFilenameTerraform(ctx); localPath != "" {
105+
raw, err := readStateFile(localPath)
106+
if errors.Is(err, fs.ErrNotExist) {
107+
altPath := terraformCacheStatePath(ctx, b)
108+
if altPath != localPath && altPath != "" {
109+
raw, err = readStateFile(altPath)
110+
}
111+
}
112+
if err == nil && raw != nil {
113+
fileSize, byType := parseTerraformStateSizes(ctx, raw)
114+
return "terraform", fileSize, byType
115+
}
116+
if err != nil {
117+
log.Debugf(ctx, "resources-metadata telemetry: skipping terraform state at %s: %s", localPath, err)
118+
}
119+
}
120+
121+
return "", 0, nil
122+
}
123+
124+
func readStateFile(path string) ([]byte, error) {
125+
if path == "" {
126+
return nil, nil
127+
}
128+
raw, err := os.ReadFile(path)
129+
if errors.Is(err, fs.ErrNotExist) {
130+
return nil, nil
131+
}
132+
return raw, err
133+
}
134+
135+
func terraformCacheStatePath(ctx context.Context, b *bundle.Bundle) string {
136+
dir, err := terraform.Dir(ctx, b)
137+
if err != nil {
138+
return ""
139+
}
140+
return filepath.Join(dir, "terraform.tfstate")
141+
}
142+
143+
func parseDirectStateSizes(ctx context.Context, raw []byte) map[string][]int64 {
144+
var db dstate.Database
145+
if err := json.Unmarshal(raw, &db); err != nil {
146+
log.Debugf(ctx, "resources-metadata telemetry: failed to parse direct state: %s", err)
147+
return nil
148+
}
149+
byType := make(map[string][]int64)
150+
for key, entry := range db.State {
151+
t := resourceTypeFromKey(key)
152+
if t == "" {
153+
continue
154+
}
155+
byType[t] = append(byType[t], int64(len(entry.State)))
156+
}
157+
return byType
158+
}
159+
160+
// parseTerraformStateSizes translates the tfstate to the direct-engine
161+
// representation (a dstate.Database with one ResourceEntry per managed
162+
// resource instance) before measuring sizes. This makes the file size and
163+
// per-type sizes comparable between engines for the same logical bundle.
164+
//
165+
// Returns the size in bytes of the JSON-serialized translated database (used
166+
// as state_file_size_bytes) and the per-resource-type byte sizes.
167+
func parseTerraformStateSizes(ctx context.Context, raw []byte) (int64, map[string][]int64) {
168+
var state struct {
169+
Version int `json:"version"`
170+
Resources []struct {
171+
Type string `json:"type"`
172+
Name string `json:"name"`
173+
Mode tfjson.ResourceMode `json:"mode"`
174+
Instances []struct {
175+
Attributes json.RawMessage `json:"attributes"`
176+
} `json:"instances"`
177+
} `json:"resources"`
178+
}
179+
if err := json.Unmarshal(raw, &state); err != nil {
180+
log.Debugf(ctx, "resources-metadata telemetry: failed to parse terraform state: %s", err)
181+
return 0, nil
182+
}
183+
184+
db := dstate.NewDatabase("", 0)
185+
byType := make(map[string][]int64)
186+
for _, resource := range state.Resources {
187+
if resource.Mode != tfjson.ManagedResourceMode {
188+
continue
189+
}
190+
groupName, ok := terraform.TerraformToGroupName[resource.Type]
191+
if !ok {
192+
continue
193+
}
194+
for i, instance := range resource.Instances {
195+
key := "resources." + groupName + "." + resource.Name
196+
if i > 0 {
197+
key = key + ".instance" + strconv.Itoa(i)
198+
}
199+
db.State[key] = dstate.ResourceEntry{State: instance.Attributes}
200+
byType[groupName] = append(byType[groupName], int64(len(instance.Attributes)))
201+
}
202+
}
203+
204+
serialized, err := json.Marshal(db)
205+
if err != nil {
206+
log.Debugf(ctx, "resources-metadata telemetry: failed to serialize translated state: %s", err)
207+
return 0, byType
208+
}
209+
return int64(len(serialized)), byType
210+
}
211+
212+
// resourceTypeFromKey extracts the resource type from a direct-engine state
213+
// key. Direct-engine keys are of the form "resources.<type>.<name>" or
214+
// "resources.<type>.<name>.<sub>" (for permissions/grants/secret_acls).
215+
// Returns "" for keys that don't match.
216+
func resourceTypeFromKey(key string) string {
217+
parts := strings.SplitN(key, ".", 4)
218+
if len(parts) < 3 || parts[0] != "resources" {
219+
return ""
220+
}
221+
if len(parts) == 4 {
222+
// Sub-resources like permissions / grants / secret_acls live at
223+
// "resources.<parent>.<name>.<sub>". Track them under the sub-resource
224+
// type so they aggregate across resource families.
225+
return parts[3]
226+
}
227+
return parts[1]
228+
}
229+
230+
func unionKeys(a map[string]int64, b map[string][]int64) []string {
231+
seen := make(map[string]struct{}, len(a)+len(b))
232+
for k := range a {
233+
seen[k] = struct{}{}
234+
}
235+
for k := range b {
236+
seen[k] = struct{}{}
237+
}
238+
out := make([]string, 0, len(seen))
239+
for k := range seen {
240+
out = append(out, k)
241+
}
242+
return out
243+
}
244+
245+
func statMax(sortedSizes []int64) int64 {
246+
if len(sortedSizes) == 0 {
247+
return 0
248+
}
249+
return sortedSizes[len(sortedSizes)-1]
250+
}
251+
252+
func statMean(sortedSizes []int64) int64 {
253+
if len(sortedSizes) == 0 {
254+
return 0
255+
}
256+
var total int64
257+
for _, s := range sortedSizes {
258+
total += s
259+
}
260+
return total / int64(len(sortedSizes))
261+
}
262+
263+
func statMedian(sortedSizes []int64) int64 {
264+
if len(sortedSizes) == 0 {
265+
return 0
266+
}
267+
return sortedSizes[(len(sortedSizes)-1)/2]
268+
}

0 commit comments

Comments
 (0)