diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go index aa5438577a..8405047ba8 100644 --- a/pkg/phlaredb/symdb/resolver.go +++ b/pkg/phlaredb/symdb/resolver.go @@ -41,14 +41,6 @@ type Resolver struct { type ResolverOption func(*Resolver) -// WithResolverMaxConcurrent specifies how many partitions -// can be resolved concurrently. -func WithResolverMaxConcurrent(n int) ResolverOption { - return func(r *Resolver) { - r.c = n - } -} - // WithResolverMaxNodes specifies the desired maximum number // of nodes the resulting profile should include. func WithResolverMaxNodes(n int64) ResolverOption { diff --git a/pkg/phlaredb/symdb/resolver_pprof.go b/pkg/phlaredb/symdb/resolver_pprof.go index ba1dd4639e..9b8a549f5d 100644 --- a/pkg/phlaredb/symdb/resolver_pprof.go +++ b/pkg/phlaredb/symdb/resolver_pprof.go @@ -2,6 +2,7 @@ package symdb import ( "context" + "unsafe" googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" @@ -35,11 +36,18 @@ func buildPprof( // profile can exist. Otherwise, build an empty profile. case !selection.HasValidCallSite(): return b.buildPprof(), nil - // Truncation is applicable when there is an explicit - // limit on the number of the nodes in the profile, or - // if stack traces should be filtered by the call site. - case maxNodes > 0 || len(selection.callSite) > 0: - b = &pprofTree{maxNodes: maxNodes, selection: selection} + // Stack trace filtering is only possible when the profile + // has functions (symbolized); for that, we first build a + // function call tree and then trim nodes according to the + // max nodes limit. + case len(selection.callSite) > 0: + b = &pprofFuncTree{maxNodes: maxNodes, selection: selection} + // Otherwise, if the max nodes limit is provided, we rely on + // the location tree, and ignore symbols altogether. Note that + // the result of truncation may be slightly different compared + // to the function tree. + case maxNodes > 0: + b = &pprofLocTree{maxNodes: maxNodes} } b.init(symbols, samples) if err := symbols.Stacktraces.ResolveStacktraceLocations(ctx, b, samples.StacktraceIDs); err != nil { @@ -261,3 +269,17 @@ func copyStrings(profile *googlev1.Profile, symbols *Symbols, lut []uint32) { f.SystemName = int64(lut[f.SystemName+o]) } } + +func uint64sliceString(u []uint64) string { + if len(u) == 0 { + return "" + } + return unsafe.String((*byte)(unsafe.Pointer(&u[0])), len(u)*8) +} + +func int32sliceString(u []int32) string { + if len(u) == 0 { + return "" + } + return unsafe.String((*byte)(unsafe.Pointer(&u[0])), len(u)*4) +} diff --git a/pkg/phlaredb/symdb/resolver_pprof_go_pgo.go b/pkg/phlaredb/symdb/resolver_pprof_go_pgo.go index 0ce338d7da..9cb15b111c 100644 --- a/pkg/phlaredb/symdb/resolver_pprof_go_pgo.go +++ b/pkg/phlaredb/symdb/resolver_pprof_go_pgo.go @@ -2,7 +2,6 @@ package symdb import ( "strings" - "unsafe" googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" @@ -57,10 +56,6 @@ func (r *pprofGoPGO) InsertStacktrace(_ uint32, locations []int32) { r.cur++ } -func int32sliceString(u []int32) string { - return unsafe.String((*byte)(unsafe.Pointer(&u[0])), len(u)*4) -} - func (r *pprofGoPGO) buildPprof() *googlev1.Profile { createSampleTypeStub(&r.profile) r.appendSamples() diff --git a/pkg/phlaredb/symdb/resolver_pprof_tree.go b/pkg/phlaredb/symdb/resolver_pprof_tree_funcs.go similarity index 85% rename from pkg/phlaredb/symdb/resolver_pprof_tree.go rename to pkg/phlaredb/symdb/resolver_pprof_tree_funcs.go index da7de651a4..6695e62734 100644 --- a/pkg/phlaredb/symdb/resolver_pprof_tree.go +++ b/pkg/phlaredb/symdb/resolver_pprof_tree_funcs.go @@ -1,8 +1,6 @@ package symdb import ( - "unsafe" - googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" "github.com/grafana/pyroscope/pkg/model" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" @@ -14,7 +12,7 @@ const ( truncatedNodeName = "other" ) -type pprofTree struct { +type pprofFuncTree struct { symbols *Symbols samples *schemav1.Samples profile googlev1.Profile @@ -44,12 +42,12 @@ type pprofTree struct { } type truncatedStacktraceSample struct { - stacktraceID uint32 - functionNodeIdx int32 - value int64 + stacktraceID uint32 + nodeIdx int32 + value int64 } -func (r *pprofTree) init(symbols *Symbols, samples schemav1.Samples) { +func (r *pprofFuncTree) init(symbols *Symbols, samples schemav1.Samples) { r.symbols = symbols r.samples = &samples // We optimistically assume that each stacktrace has only @@ -64,21 +62,21 @@ func (r *pprofTree) init(symbols *Symbols, samples schemav1.Samples) { } } -func (r *pprofTree) InsertStacktrace(stacktraceID uint32, locations []int32) { +func (r *pprofFuncTree) InsertStacktrace(stacktraceID uint32, locations []int32) { value := int64(r.samples.Values[r.cur]) r.cur++ functions, ok := r.fnNames(locations) if ok { functionNodeIdx := r.functionTree.Insert(functions, value) r.stacktraces = append(r.stacktraces, truncatedStacktraceSample{ - stacktraceID: stacktraceID, - functionNodeIdx: functionNodeIdx, - value: value, + stacktraceID: stacktraceID, + nodeIdx: functionNodeIdx, + value: value, }) } } -func (r *pprofTree) locFunctions(locations []int32) ([]int32, bool) { +func (r *pprofFuncTree) locFunctions(locations []int32) ([]int32, bool) { r.functionsBuf = r.functionsBuf[:0] for i := 0; i < len(locations); i++ { lines := r.symbols.Locations[locations[i]].Line @@ -89,7 +87,7 @@ func (r *pprofTree) locFunctions(locations []int32) ([]int32, bool) { return r.functionsBuf, true } -func (r *pprofTree) locFunctionsFiltered(locations []int32) ([]int32, bool) { +func (r *pprofFuncTree) locFunctionsFiltered(locations []int32) ([]int32, bool) { r.functionsBuf = r.functionsBuf[:0] var pos int pathLen := int(r.selection.depth) @@ -115,7 +113,7 @@ func (r *pprofTree) locFunctionsFiltered(locations []int32) ([]int32, bool) { return r.functionsBuf, true } -func (r *pprofTree) buildPprof() *googlev1.Profile { +func (r *pprofFuncTree) buildPprof() *googlev1.Profile { r.markNodesForTruncation() for _, n := range r.stacktraces { r.addSample(n) @@ -132,7 +130,7 @@ func (r *pprofTree) buildPprof() *googlev1.Profile { return &r.profile } -func (r *pprofTree) markNodesForTruncation() { +func (r *pprofFuncTree) markNodesForTruncation() { minValue := r.functionTree.MinValue(r.maxNodes) if minValue == 0 { return @@ -145,11 +143,11 @@ func (r *pprofTree) markNodesForTruncation() { } } -func (r *pprofTree) addSample(n truncatedStacktraceSample) { +func (r *pprofFuncTree) addSample(n truncatedStacktraceSample) { // Find the original stack trace and remove truncated // locations based on the truncated functions. var off int - r.functionsBuf, off = r.buildFunctionsStack(r.functionsBuf, n.functionNodeIdx) + r.functionsBuf, off = r.buildFunctionsStack(r.functionsBuf, n.nodeIdx) if off < 0 { // The stack has no functions without the truncation mark. r.fullyTruncated += n.value @@ -182,7 +180,7 @@ func (r *pprofTree) addSample(n truncatedStacktraceSample) { r.sampleMap[uint64sliceString(locationsCopy)] = s } -func (r *pprofTree) buildFunctionsStack(funcs []int32, idx int32) ([]int32, int) { +func (r *pprofFuncTree) buildFunctionsStack(funcs []int32, idx int32) ([]int32, int) { offset := -1 funcs = funcs[:0] for i := idx; i > 0; i = r.functionTree.Nodes[i].Parent { @@ -196,7 +194,7 @@ func (r *pprofTree) buildFunctionsStack(funcs []int32, idx int32) ([]int32, int) return funcs, offset } -func (r *pprofTree) createSamples() { +func (r *pprofFuncTree) createSamples() { samples := len(r.sampleMap) r.profile.Sample = make([]*googlev1.Sample, samples, samples+1) var i int @@ -216,10 +214,7 @@ func truncateLocations(locations []uint64, functions []int32, offset int, symbol f := len(functions) l := len(locations) for ; l > 0 && f >= offset; l-- { - location := symbols.Locations[locations[l-1]] - for j := len(location.Line) - 1; j >= 0; j-- { - f-- - } + f -= len(symbols.Locations[locations[l-1]].Line) } if l > 0 { locations[0] = truncationMark @@ -228,15 +223,7 @@ func truncateLocations(locations []uint64, functions []int32, offset int, symbol return locations[l:] } -func uint64sliceString(u []uint64) string { - if len(u) == 0 { - return "" - } - p := (*byte)(unsafe.Pointer(&u[0])) - return unsafe.String(p, len(u)*8) -} - -func (r *pprofTree) createStubSample() { +func (r *pprofFuncTree) createStubSample() { r.profile.Sample = append(r.profile.Sample, &googlev1.Sample{ LocationId: []uint64{truncationMark}, Value: []int64{r.fullyTruncated}, @@ -261,7 +248,6 @@ func createLocationStub(profile *googlev1.Profile) { SystemName: stubNodeNameIdx, } profile.Function = append(profile.Function, stubFn) - // in the case there is no mapping, we need to create one if len(profile.Mapping) == 0 { profile.Mapping = append(profile.Mapping, &googlev1.Mapping{Id: 1}) } diff --git a/pkg/phlaredb/symdb/resolver_pprof_tree_locs.go b/pkg/phlaredb/symdb/resolver_pprof_tree_locs.go new file mode 100644 index 0000000000..65b984b43d --- /dev/null +++ b/pkg/phlaredb/symdb/resolver_pprof_tree_locs.go @@ -0,0 +1,134 @@ +package symdb + +import ( + googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" + "github.com/grafana/pyroscope/pkg/model" + schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" +) + +type pprofLocTree struct { + symbols *Symbols + samples *schemav1.Samples + profile googlev1.Profile + lut []uint32 + cur int + + maxNodes int64 + truncated int + // Sum of fully truncated samples. + fullyTruncated int64 + + locTree *model.StacktraceTree + stacktraces []truncatedStacktraceSample + locationsBuf []int32 + sampleMap map[string]*googlev1.Sample +} + +func (r *pprofLocTree) init(symbols *Symbols, samples schemav1.Samples) { + r.symbols = symbols + r.samples = &samples + // We optimistically assume that each stacktrace has only + // 2 unique nodes. For pathological cases it may exceed 10. + r.locTree = model.NewStacktraceTree(samples.Len() * 2) + r.stacktraces = make([]truncatedStacktraceSample, 0, samples.Len()) + r.sampleMap = make(map[string]*googlev1.Sample, samples.Len()) +} + +func (r *pprofLocTree) InsertStacktrace(_ uint32, locations []int32) { + value := int64(r.samples.Values[r.cur]) + r.cur++ + locNodeIdx := r.locTree.Insert(locations, value) + r.stacktraces = append(r.stacktraces, truncatedStacktraceSample{ + nodeIdx: locNodeIdx, + value: value, + }) +} + +func (r *pprofLocTree) buildPprof() *googlev1.Profile { + r.markNodesForTruncation() + for _, n := range r.stacktraces { + r.addSample(n) + } + r.createSamples() + createSampleTypeStub(&r.profile) + copyLocations(&r.profile, r.symbols, r.lut) + copyFunctions(&r.profile, r.symbols, r.lut) + copyMappings(&r.profile, r.symbols, r.lut) + copyStrings(&r.profile, r.symbols, r.lut) + if r.truncated > 0 || r.fullyTruncated > 0 { + createLocationStub(&r.profile) + } + return &r.profile +} + +func (r *pprofLocTree) markNodesForTruncation() { + // We preserve more nodes than requested to preserve more + // locations with inlined functions. The multiplier is + // chosen empirically; it should be roughly equal to the + // ratio of nodes in the location tree to the nodes in the + // function tree (after truncation). + minValue := r.locTree.MinValue(r.maxNodes * 4) + if minValue == 0 { + return + } + for i := range r.locTree.Nodes { + if r.locTree.Nodes[i].Total < minValue { + r.locTree.Nodes[i].Location |= truncationMark + r.truncated++ + } + } +} + +func (r *pprofLocTree) addSample(n truncatedStacktraceSample) { + r.locationsBuf = r.buildLocationsStack(r.locationsBuf, n.nodeIdx) + if len(r.locationsBuf) == 0 { + // The stack has no functions without the truncation mark. + r.fullyTruncated += n.value + return + } + if s, ok := r.sampleMap[int32sliceString(r.locationsBuf)]; ok { + s.Value[0] += n.value + return + } + + locationsCopy := make([]uint64, len(r.locationsBuf)) + for i := 0; i < len(r.locationsBuf); i++ { + locationsCopy[i] = uint64(r.locationsBuf[i]) + } + + s := &googlev1.Sample{LocationId: locationsCopy, Value: []int64{n.value}} + r.profile.Sample = append(r.profile.Sample, s) + + k := make([]int32, len(r.locationsBuf)) + copy(k, r.locationsBuf) + r.sampleMap[int32sliceString(k)] = s +} + +func (r *pprofLocTree) buildLocationsStack(dst []int32, idx int32) []int32 { + dst = dst[:0] + for i := idx; i > 0; i = r.locTree.Nodes[i].Parent { + if r.locTree.Nodes[i].Location&truncationMark == 0 { + dst = append(dst, r.locTree.Nodes[i].Location&^truncationMark) + } else if len(dst) == 0 { + dst = append(dst, truncationMark) + } + } + if len(dst) == 1 && dst[0] == truncationMark { + return dst[:0] + } + return dst +} + +func (r *pprofLocTree) createSamples() { + samples := len(r.sampleMap) + r.profile.Sample = make([]*googlev1.Sample, 0, samples+1) + for _, s := range r.sampleMap { + r.profile.Sample = append(r.profile.Sample, s) + } + if r.fullyTruncated > 0 { + r.profile.Sample = append(r.profile.Sample, &googlev1.Sample{ + LocationId: []uint64{truncationMark}, + Value: []int64{r.fullyTruncated}, + }) + } +} diff --git a/pkg/phlaredb/symdb/testdata/profile.loc.truncated.pb.gz b/pkg/phlaredb/symdb/testdata/profile.loc.truncated.pb.gz new file mode 100644 index 0000000000..5cb3eb39c5 Binary files /dev/null and b/pkg/phlaredb/symdb/testdata/profile.loc.truncated.pb.gz differ