Skip to content

Commit 3dc9b1c

Browse files
committed
feat: add __unsymbolized__ label on ingest path (#4147)
* feat: add __has_native_unsymbolized_profiles__ label on ingest path * Update unsymbolized check logic & update label name * Fix wrong HasUnsymbolizedProfiles logic and add tests
1 parent df3edab commit 3dc9b1c

File tree

4 files changed

+145
-40
lines changed

4 files changed

+145
-40
lines changed

pkg/experiment/block/metadata/metadata_labels.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
const (
1919
LabelNameTenantDataset = "__tenant_dataset__"
2020
LabelValueDatasetTSDBIndex = "dataset_tsdb_index"
21+
LabelNameUnsymbolized = "__unsymbolized__"
2122
)
2223

2324
type LabelBuilder struct {

pkg/experiment/ingester/memdb/head.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@ import (
2121
)
2222

2323
type FlushedHead struct {
24-
Index []byte
25-
Profiles []byte
26-
Symbols []byte
27-
Meta struct {
24+
Index []byte
25+
Profiles []byte
26+
Symbols []byte
27+
HasUnsymbolizedProfiles bool
28+
Meta struct {
2829
ProfileTypeNames []string
2930
MinTimeNanos int64
3031
MaxTimeNanos int64
@@ -153,6 +154,8 @@ func (h *Head) flush(ctx context.Context) (*FlushedHead, error) {
153154
return res, nil
154155
}
155156

157+
res.HasUnsymbolizedProfiles = HasUnsymbolizedProfiles(h.symbols.Symbols())
158+
156159
symbolsBuffer := bytes.NewBuffer(nil)
157160
if err := symdb.WritePartition(h.symbols, symbolsBuffer); err != nil {
158161
return nil, err
@@ -173,3 +176,15 @@ func (h *Head) flush(ctx context.Context) (*FlushedHead, error) {
173176
}
174177
return res, nil
175178
}
179+
180+
// TODO: move into the symbolizer package when available
181+
func HasUnsymbolizedProfiles(symbols *symdb.Symbols) bool {
182+
locations := symbols.Locations
183+
mappings := symbols.Mappings
184+
for _, loc := range locations {
185+
if !mappings[loc.MappingId].HasFunctions {
186+
return true
187+
}
188+
}
189+
return false
190+
}

pkg/experiment/ingester/memdb/head_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"github.com/grafana/pyroscope/pkg/og/convert/pprof/bench"
2828
"github.com/grafana/pyroscope/pkg/phlaredb"
2929
testutil2 "github.com/grafana/pyroscope/pkg/phlaredb/block/testutil"
30+
"github.com/grafana/pyroscope/pkg/phlaredb/symdb"
3031
"github.com/grafana/pyroscope/pkg/pprof"
3132
"github.com/grafana/pyroscope/pkg/pprof/testhelper"
3233
)
@@ -672,6 +673,88 @@ func Test_HeadFlush_DuplicateLabels(t *testing.T) {
672673
&typesv1.LabelPair{Name: "pod", Value: "not-my-pod"},
673674
)
674675
}
676+
677+
// TODO: move into the symbolizer package when available
678+
func TestUnsymbolized(t *testing.T) {
679+
testCases := []struct {
680+
name string
681+
profile *profilev1.Profile
682+
expectUnsymbolized bool
683+
}{
684+
{
685+
name: "fully symbolized profile",
686+
profile: &profilev1.Profile{
687+
StringTable: []string{"", "a"},
688+
Function: []*profilev1.Function{
689+
{Id: 4, Name: 1},
690+
},
691+
Mapping: []*profilev1.Mapping{
692+
{Id: 239, HasFunctions: true},
693+
},
694+
Location: []*profilev1.Location{
695+
{Id: 5, MappingId: 239, Line: []*profilev1.Line{{FunctionId: 4, Line: 1}}},
696+
},
697+
Sample: []*profilev1.Sample{
698+
{LocationId: []uint64{5}, Value: []int64{1}},
699+
},
700+
},
701+
expectUnsymbolized: false,
702+
},
703+
{
704+
name: "mapping without functions",
705+
profile: &profilev1.Profile{
706+
StringTable: []string{"", "a"},
707+
Function: []*profilev1.Function{
708+
{Id: 4, Name: 1},
709+
},
710+
Mapping: []*profilev1.Mapping{
711+
{Id: 239, HasFunctions: false},
712+
},
713+
Location: []*profilev1.Location{
714+
{Id: 5, MappingId: 239, Line: []*profilev1.Line{{FunctionId: 4, Line: 1}}},
715+
},
716+
Sample: []*profilev1.Sample{
717+
{LocationId: []uint64{5}, Value: []int64{1}},
718+
},
719+
},
720+
expectUnsymbolized: true,
721+
},
722+
{
723+
name: "multiple locations with mixed symbolization",
724+
profile: &profilev1.Profile{
725+
StringTable: []string{"", "a", "b"},
726+
Function: []*profilev1.Function{
727+
{Id: 4, Name: 1},
728+
{Id: 5, Name: 2},
729+
},
730+
Mapping: []*profilev1.Mapping{
731+
{Id: 239, HasFunctions: true},
732+
{Id: 240, HasFunctions: false},
733+
},
734+
Location: []*profilev1.Location{
735+
{Id: 5, MappingId: 239, Line: []*profilev1.Line{{FunctionId: 4, Line: 1}}},
736+
{Id: 6, MappingId: 240, Line: nil},
737+
},
738+
Sample: []*profilev1.Sample{
739+
{LocationId: []uint64{5, 6}, Value: []int64{1}},
740+
},
741+
},
742+
expectUnsymbolized: true,
743+
},
744+
}
745+
746+
for _, tc := range testCases {
747+
t.Run(tc.name, func(t *testing.T) {
748+
symbols := symdb.NewPartitionWriter(0, &symdb.Config{
749+
Version: symdb.FormatV3,
750+
})
751+
symbols.WriteProfileSymbols(tc.profile)
752+
unsymbolized := HasUnsymbolizedProfiles(symbols.Symbols())
753+
assert.Equal(t, tc.expectUnsymbolized, unsymbolized)
754+
})
755+
}
756+
}
757+
675758
func BenchmarkHeadIngestProfiles(t *testing.B) {
676759
var (
677760
profilePaths = []string{

pkg/experiment/ingester/segment.go

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ func (sh *shard) flushSegment(ctx context.Context, wg *sync.WaitGroup) {
116116
if s.debuginfo.movedHeads > 0 {
117117
_ = level.Debug(s.logger).Log("msg",
118118
"writing segment block done",
119-
"heads-count", len(s.heads),
119+
"heads-count", len(s.datasets),
120120
"heads-moved-count", s.debuginfo.movedHeads,
121121
"inflight-duration", s.debuginfo.waitInflight,
122122
"flush-heads-duration", s.debuginfo.flushHeadsDuration,
@@ -203,7 +203,7 @@ func (sw *segmentsWriter) newSegment(sh *shard, sk shardKey, sl log.Logger) *seg
203203
s := &segment{
204204
logger: log.With(sl, "segment-id", id.String()),
205205
ulid: id,
206-
heads: make(map[datasetKey]dataset),
206+
datasets: make(map[datasetKey]*dataset),
207207
sw: sw,
208208
sh: sh,
209209
shard: sk,
@@ -216,7 +216,7 @@ func (sw *segmentsWriter) newSegment(sh *shard, sk shardKey, sl log.Logger) *seg
216216
func (s *segment) flush(ctx context.Context) (err error) {
217217
span, ctx := opentracing.StartSpanFromContext(ctx, "segment.flush", opentracing.Tags{
218218
"block_id": s.ulid.String(),
219-
"datasets": len(s.heads),
219+
"datasets": len(s.datasets),
220220
"shard": s.shard,
221221
})
222222
defer span.Finish()
@@ -340,6 +340,10 @@ func concatSegmentHead(f *headFlush, w *writerOffset, s *metadata.StringTable) *
340340
lb.WithLabelSet(model.LabelNameServiceName, f.head.key.service, model.LabelNameProfileType, profileType)
341341
}
342342

343+
if f.flushed.HasUnsymbolizedProfiles {
344+
lb.WithLabelSet(model.LabelNameServiceName, f.head.key.service, metadata.LabelNameUnsymbolized, "true")
345+
}
346+
343347
// Other optional labels:
344348
// lb.WithLabelSet("label_name", "label_value", ...)
345349
ds.Labels = lb.Build()
@@ -348,8 +352,8 @@ func concatSegmentHead(f *headFlush, w *writerOffset, s *metadata.StringTable) *
348352
}
349353

350354
func (s *segment) flushHeads(ctx context.Context) flushStream {
351-
heads := maps.Values(s.heads)
352-
slices.SortFunc(heads, func(a, b dataset) int {
355+
heads := maps.Values(s.datasets)
356+
slices.SortFunc(heads, func(a, b *dataset) int {
353357
return a.key.compare(b.key)
354358
})
355359

@@ -364,15 +368,15 @@ func (s *segment) flushHeads(ctx context.Context) flushStream {
364368
defer close(f.done)
365369
flushed, err := s.flushHead(ctx, f.head)
366370
if err != nil {
367-
level.Error(s.logger).Log("msg", "failed to flush head", "err", err)
371+
level.Error(s.logger).Log("msg", "failed to flush dataset", "err", err)
368372
return
369373
}
370374
if flushed == nil {
371-
level.Debug(s.logger).Log("msg", "skipping nil head")
375+
level.Debug(s.logger).Log("msg", "skipping nil dataset")
372376
return
373377
}
374378
if flushed.Meta.NumSamples == 0 {
375-
level.Debug(s.logger).Log("msg", "skipping empty head")
379+
level.Debug(s.logger).Log("msg", "skipping empty dataset")
376380
return
377381
}
378382
f.flushed = flushed
@@ -403,24 +407,24 @@ func (s *flushStream) Next() bool {
403407
return false
404408
}
405409

406-
func (s *segment) flushHead(ctx context.Context, e dataset) (*memdb.FlushedHead, error) {
410+
func (s *segment) flushHead(ctx context.Context, e *dataset) (*memdb.FlushedHead, error) {
407411
th := time.Now()
408412
flushed, err := e.head.Flush(ctx)
409413
if err != nil {
410414
s.sw.metrics.flushServiceHeadDuration.WithLabelValues(s.sshard, e.key.tenant).Observe(time.Since(th).Seconds())
411415
s.sw.metrics.flushServiceHeadError.WithLabelValues(s.sshard, e.key.tenant).Inc()
412-
return nil, fmt.Errorf("failed to flush head : %w", err)
416+
return nil, fmt.Errorf("failed to flush dataset : %w", err)
413417
}
414418
s.sw.metrics.flushServiceHeadDuration.WithLabelValues(s.sshard, e.key.tenant).Observe(time.Since(th).Seconds())
415419
level.Debug(s.logger).Log(
416-
"msg", "flushed head",
420+
"msg", "flushed dataset",
417421
"tenant", e.key.tenant,
418422
"service", e.key.service,
419423
"profiles", flushed.Meta.NumProfiles,
420424
"profiletypes", fmt.Sprintf("%v", flushed.Meta.ProfileTypeNames),
421425
"mintime", flushed.Meta.MinTimeNanos,
422426
"maxtime", flushed.Meta.MaxTimeNanos,
423-
"head-flush-duration", time.Since(th).String(),
427+
"dataset-flush-duration", time.Since(th).String(),
424428
)
425429
return flushed, nil
426430
}
@@ -443,7 +447,7 @@ type dataset struct {
443447
}
444448

445449
type headFlush struct {
446-
head dataset
450+
head *dataset
447451
flushed *memdb.FlushedHead
448452
// protects head
449453
done chan struct{}
@@ -454,10 +458,12 @@ type segment struct {
454458
shard shardKey
455459
sshard string
456460
inFlightProfiles sync.WaitGroup
457-
heads map[datasetKey]dataset
458-
headsLock sync.RWMutex
459-
logger log.Logger
460-
sw *segmentsWriter
461+
462+
mu sync.RWMutex
463+
datasets map[datasetKey]*dataset
464+
465+
logger log.Logger
466+
sw *segmentsWriter
461467

462468
// TODO(kolesnikovae): Revisit.
463469
doneChan chan struct{}
@@ -501,11 +507,12 @@ func (s *segment) ingest(tenantID string, p *profilev1.Profile, id uuid.UUID, la
501507
tenant: tenantID,
502508
service: model.Labels(labels).Get(model.LabelNameServiceName),
503509
}
510+
ds := s.datasetForIngest(k)
504511
size := p.SizeVT()
505512
rules := s.sw.limits.IngestionRelabelingRules(tenantID)
506513
usage := s.sw.limits.DistributorUsageGroups(tenantID).GetUsageGroups(tenantID, labels)
507514
appender := &sampleAppender{
508-
head: s.headForIngest(k),
515+
dataset: ds,
509516
profile: p,
510517
id: id,
511518
annotations: annotations,
@@ -519,7 +526,7 @@ func (s *segment) ingest(tenantID string, p *profilev1.Profile, id uuid.UUID, la
519526

520527
type sampleAppender struct {
521528
id uuid.UUID
522-
head *memdb.Head
529+
dataset *dataset
523530
profile *profilev1.Profile
524531
exporter *pprofmodel.SampleExporter
525532
annotations []*typesv1.ProfileAnnotation
@@ -529,7 +536,7 @@ type sampleAppender struct {
529536
}
530537

531538
func (v *sampleAppender) VisitProfile(labels []*typesv1.LabelPair) {
532-
v.head.Ingest(v.profile, v.id, labels, v.annotations)
539+
v.dataset.head.Ingest(v.profile, v.id, labels, v.annotations)
533540
}
534541

535542
func (v *sampleAppender) VisitSampleSeries(labels []*typesv1.LabelPair, samples []*profilev1.Sample) {
@@ -538,37 +545,36 @@ func (v *sampleAppender) VisitSampleSeries(labels []*typesv1.LabelPair, samples
538545
}
539546
var n profilev1.Profile
540547
v.exporter.ExportSamples(&n, samples)
541-
v.head.Ingest(&n, v.id, labels, v.annotations)
548+
v.dataset.head.Ingest(v.profile, v.id, labels, v.annotations)
542549
}
543550

544551
func (v *sampleAppender) Discarded(profiles, bytes int) {
545552
v.discardedProfiles += profiles
546553
v.discardedBytes += bytes
547554
}
548555

549-
func (s *segment) headForIngest(k datasetKey) *memdb.Head {
550-
s.headsLock.RLock()
551-
h, ok := s.heads[k]
552-
s.headsLock.RUnlock()
556+
func (s *segment) datasetForIngest(k datasetKey) *dataset {
557+
s.mu.RLock()
558+
ds, ok := s.datasets[k]
559+
s.mu.RUnlock()
553560
if ok {
554-
return h.head
561+
return ds
555562
}
556563

557-
s.headsLock.Lock()
558-
defer s.headsLock.Unlock()
559-
h, ok = s.heads[k]
560-
if ok {
561-
return h.head
564+
s.mu.Lock()
565+
defer s.mu.Unlock()
566+
if ds, ok = s.datasets[k]; ok {
567+
return ds
562568
}
563569

564-
nh := memdb.NewHead(s.sw.headMetrics)
565-
566-
s.heads[k] = dataset{
570+
h := memdb.NewHead(s.sw.headMetrics)
571+
ds = &dataset{
567572
key: k,
568-
head: nh,
573+
head: h,
569574
}
570575

571-
return nh
576+
s.datasets[k] = ds
577+
return ds
572578
}
573579

574580
func (sw *segmentsWriter) uploadBlock(ctx context.Context, blockData []byte, meta *metastorev1.BlockMeta, s *segment) error {

0 commit comments

Comments
 (0)