Skip to content

Commit 2de1c5b

Browse files
authored
[PROTON] Simplify runtime and metric correlation to reduce overhead (#9132)
1 parent fd8fd32 commit 2de1c5b

18 files changed

Lines changed: 1073 additions & 884 deletions

File tree

third_party/proton/csrc/include/Data/Data.h

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,53 +7,82 @@
77
#include <memory>
88
#include <shared_mutex>
99
#include <string>
10+
#include <utility>
1011

1112
namespace proton {
1213

1314
enum class OutputFormat { Hatchet, ChromeTrace, Count };
1415

16+
/// An "entry" is a data specific unit of operation, e.g., a node in a tree
17+
/// data structure or an event in a trace data structure.
18+
struct DataEntry {
19+
/// `entryId` is a unique identifier for the entry in the data.
20+
size_t id{Scope::DummyScopeId};
21+
/// `metrics` is a map from metric kind to metric accumulator associated
22+
/// with the entry.
23+
/// Flexible metrics cannot be directly stored here since they maybe added by
24+
/// both the frontend and the backend.
25+
/// Use `Data::addScopeMetrics` and `Data::addEntryMetrics` to add flexible
26+
/// metrics.
27+
std::reference_wrapper<std::map<MetricKind, std::unique_ptr<Metric>>> metrics;
28+
29+
explicit DataEntry(size_t id,
30+
std::map<MetricKind, std::unique_ptr<Metric>> &metrics)
31+
: id(id), metrics(metrics) {}
32+
33+
void upsertMetric(std::unique_ptr<Metric> metric) {
34+
if (!metric)
35+
return;
36+
auto &metricsMap = metrics.get();
37+
auto it = metricsMap.find(metric->getKind());
38+
if (it == metricsMap.end()) {
39+
metricsMap.emplace(metric->getKind(), std::move(metric));
40+
} else {
41+
it->second->updateMetric(*metric);
42+
}
43+
}
44+
};
45+
1546
class Data : public ScopeInterface {
1647
public:
1748
Data(const std::string &path, ContextSource *contextSource = nullptr)
1849
: path(path), contextSource(contextSource) {}
1950
virtual ~Data() = default;
2051

2152
/// Add an op to the data.
22-
/// If scopeId is already present, add an op under/inside it.
23-
/// Otherwise obtain the current context and append opName to it if opName is
24-
/// not empty.
25-
virtual size_t addOp(size_t scopeId, const std::string &opName = {}) = 0;
53+
/// Otherwise obtain the current context and append `opName` to it if `opName`
54+
/// is not empty. Return the entry id of the added op.
55+
virtual DataEntry addOp(const std::string &opName = {}) = 0;
2656

2757
/// Add an op with custom contexts to the data.
2858
/// This is often used when context source is not available or when
2959
/// the profiler itself needs to supply the contexts, such as
3060
/// instruction samples in GPUs whose contexts are
3161
/// synthesized from the instruction address (no unwinder).
32-
virtual size_t addOp(size_t scopeId,
33-
const std::vector<Context> &contexts) = 0;
34-
35-
/// Add a single metric to the data.
36-
virtual void addMetric(size_t scopeId, std::shared_ptr<Metric> metric) = 0;
37-
38-
/// Add an op and a metric with one call.
39-
/// The default implementation forwards to addOp + addMetric.
40-
virtual void addOpAndMetric(size_t scopeId, const std::string &opName,
41-
std::shared_ptr<Metric> metric) {
42-
scopeId = this->addOp(scopeId, opName);
43-
this->addMetric(scopeId, metric);
44-
}
45-
46-
/// Add multiple metrics to the data.
62+
virtual DataEntry addOp(size_t entryId,
63+
const std::vector<Context> &contexts) = 0;
64+
65+
/// Record a batch of named metrics for a scope.
66+
///
67+
/// This is primarily intended for user-defined metrics defined in Python and
68+
/// directly associated with a scope.
69+
/// `metrics` is a map from metric name to value to be applied to `scopeId`.
70+
virtual void
71+
addScopeMetrics(size_t scopeId,
72+
const std::map<std::string, MetricValueType> &metrics) = 0;
73+
74+
/// Record a batch of named metrics for an entry.
75+
///
76+
/// This is primarily intended for user-defined metrics defined in Python and
77+
/// added lazily by the backend profiler.
78+
/// `metrics` is a map from metric name to value to be applied to `entryId`.
4779
virtual void
48-
addMetrics(size_t scopeId,
49-
const std::map<std::string, MetricValueType> &metrics) = 0;
80+
addEntryMetrics(size_t entryId,
81+
const std::map<std::string, MetricValueType> &metrics) = 0;
5082

51-
/// Clear all non-persistent data.
83+
/// Clear all non-persistent fields in the data.
5284
virtual void clear() = 0;
5385

54-
/// Clear caching data only.
55-
virtual void clearCache() = 0;
56-
5786
/// To Json
5887
virtual std::string toJsonString() const = 0;
5988

@@ -79,6 +108,8 @@ class Data : public ScopeInterface {
79108
ContextSource *contextSource{};
80109
};
81110

111+
typedef std::map<Data *, DataEntry> DataToEntryMap;
112+
82113
OutputFormat parseOutputFormat(const std::string &outputFormat);
83114

84115
const std::string outputFormatToString(OutputFormat outputFormat);

third_party/proton/csrc/include/Data/TraceData.h

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,21 @@ class TraceData : public Data {
1212
TraceData(const std::string &path, ContextSource *contextSource = nullptr);
1313
virtual ~TraceData();
1414

15-
size_t addOp(size_t scopeId, const std::string &name) override;
15+
DataEntry addOp(const std::string &name) override;
1616

17-
size_t addOp(size_t scopeId, const std::vector<Context> &contexts) override;
17+
DataEntry addOp(size_t eventId,
18+
const std::vector<Context> &contexts) override;
1819

19-
void addMetric(size_t scopeId, std::shared_ptr<Metric> metric) override;
20+
void addScopeMetrics(
21+
size_t scopeId,
22+
const std::map<std::string, MetricValueType> &metrics) override;
2023

21-
void
22-
addMetrics(size_t scopeId,
23-
const std::map<std::string, MetricValueType> &metrics) override;
24+
void addEntryMetrics(
25+
size_t entryId,
26+
const std::map<std::string, MetricValueType> &metrics) override;
2427

2528
void clear() override;
2629

27-
void clearCache() override;
28-
2930
std::string toJsonString() const override;
3031

3132
std::vector<uint8_t> toMsgPack() const override;
@@ -47,8 +48,8 @@ class TraceData : public Data {
4748
}
4849

4950
std::unique_ptr<Trace> trace;
50-
// ScopeId -> ContextId
51-
std::unordered_map<size_t, size_t> scopeIdToContextId;
51+
// ScopeId -> EventId
52+
std::unordered_map<size_t, size_t> scopeIdToEventId;
5253
};
5354

5455
} // namespace proton

third_party/proton/csrc/include/Data/TreeData.h

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,30 +20,25 @@ class TreeData : public Data {
2020

2121
TreeData(const std::string &path) : TreeData(path, nullptr) {}
2222

23-
size_t addOp(size_t scopeId, const std::string &name) override;
23+
DataEntry addOp(const std::string &name) override;
2424

25-
size_t addOp(size_t scopeId, const std::vector<Context> &contexts) override;
25+
DataEntry addOp(size_t contextId,
26+
const std::vector<Context> &contexts) override;
2627

27-
void addMetric(size_t scopeId, std::shared_ptr<Metric> metric) override;
28+
void addScopeMetrics(
29+
size_t scopeId,
30+
const std::map<std::string, MetricValueType> &metrics) override;
2831

29-
// Override to optimize addOp + addMetric calls
30-
// 1. to avoid double locking
31-
// 2. to avoid looking up scopeId -> contextId twice
32-
void addOpAndMetric(size_t scopeId, const std::string &opName,
33-
std::shared_ptr<Metric> metric) override;
34-
35-
void
36-
addMetrics(size_t scopeId,
37-
const std::map<std::string, MetricValueType> &metrics) override;
32+
void addEntryMetrics(
33+
size_t entryId,
34+
const std::map<std::string, MetricValueType> &metrics) override;
3835

3936
std::vector<uint8_t> toMsgPack() const override;
4037

4138
std::string toJsonString() const override;
4239

4340
void clear() override;
4441

45-
void clearCache() override;
46-
4742
protected:
4843
// ScopeInterface
4944
void enterScope(const Scope &scope) override;

third_party/proton/csrc/include/Profiler/Cupti/CuptiPCSampling.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ class CuptiPCSampling : public Singleton<CuptiPCSampling> {
109109

110110
void start(CUcontext context);
111111

112-
void stop(CUcontext context, uint64_t externId, bool isAPI);
112+
void stop(CUcontext context, const DataToEntryMap &dataToEntry);
113113

114114
void finalize(CUcontext context);
115115

@@ -122,8 +122,8 @@ class CuptiPCSampling : public Singleton<CuptiPCSampling> {
122122

123123
CubinData *getCubinData(uint64_t cubinCrc);
124124

125-
void processPCSamplingData(ConfigureData *configureData, uint64_t externId,
126-
bool isAPI);
125+
void processPCSamplingData(ConfigureData *configureData,
126+
const DataToEntryMap &dataToEntry);
127127

128128
ThreadSafeMap<uint32_t, ConfigureData> contextIdToConfigureData;
129129
// In case the same cubin is loaded multiple times, we need to keep track of

0 commit comments

Comments
 (0)