Skip to content

Commit

Permalink
Merge branch 'main' into xu_fix_find_pti_view_on_windows
Browse files Browse the repository at this point in the history
  • Loading branch information
xuhancn authored Dec 16, 2024
2 parents 1b2e93f + 1721e96 commit 5042a21
Show file tree
Hide file tree
Showing 61 changed files with 404 additions and 149 deletions.
1 change: 0 additions & 1 deletion benchmarks/perfetto/backends/clp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@


class CLPTraceAnalysis(TraceAnalysis):

def __init__(self, args: argparse.Namespace):
super().__init__(args)
1 change: 0 additions & 1 deletion benchmarks/perfetto/backends/perfetto.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@


class PerfettoTraceAnalysis(TraceAnalysis):

name = "perfetto"

def __init__(self, args: argparse.Namespace):
Expand Down
2 changes: 1 addition & 1 deletion libkineto/include/GenericTraceActivity.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class GenericTraceActivity : public ITraceActivity {
return flow.type;
}

int flowId() const override {
int64_t flowId() const override {
return flow.id;
}

Expand Down
4 changes: 4 additions & 0 deletions libkineto/include/IActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ class IActivityProfilerSession {
virtual void pushUserCorrelationId(uint64_t /*id*/) {}
virtual void popUserCorrelationId() {}

virtual std::string getDeviceProperties() {
return "";
}

protected:
TraceStatus status_ = TraceStatus::READY;
};
Expand Down
2 changes: 1 addition & 1 deletion libkineto/include/ITraceActivity.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct ITraceActivity {
virtual int64_t correlationId() const = 0;
// Part of a flow, identified by flow id and type
virtual int flowType() const = 0;
virtual int flowId() const = 0;
virtual int64_t flowId() const = 0;
virtual bool flowStart() const = 0;
virtual ActivityType type() const = 0;
virtual const std::string name() const = 0;
Expand Down
5 changes: 3 additions & 2 deletions libkineto/include/output_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ class ActivityLogger {
const libkineto::GenericTraceActivity& activity) = 0;

virtual void handleTraceStart(
const std::unordered_map<std::string, std::string>& metadata) = 0;
const std::unordered_map<std::string, std::string>& metadata,
const std::string& device_properties) = 0;

void handleTraceStart() {
handleTraceStart(std::unordered_map<std::string, std::string>());
handleTraceStart(std::unordered_map<std::string, std::string>(), "");
}

virtual void finalizeTrace(
Expand Down
5 changes: 5 additions & 0 deletions libkineto/src/ActivityProfilerController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ void ActivityProfilerController::setLoggerCollectorFactory(
std::function<std::shared_ptr<LoggerCollector>()> factory) {
loggerCollectorFactory() = factory();
}

std::shared_ptr<LoggerCollector>
ActivityProfilerController::getLoggerCollector() {
return loggerCollectorFactory();
}
#endif // !USE_GOOGLE_LOG

ActivityProfilerController::ActivityProfilerController(
Expand Down
1 change: 1 addition & 0 deletions libkineto/src/ActivityProfilerController.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class ActivityProfilerController : public ConfigLoader::ConfigHandler {
~ActivityProfilerController();

#if !USE_GOOGLE_LOG
static std::shared_ptr<LoggerCollector> getLoggerCollector();
static void setLoggerCollectorFactory(
std::function<std::shared_ptr<LoggerCollector>()> factory);
#endif // !USE_GOOGLE_LOG
Expand Down
2 changes: 1 addition & 1 deletion libkineto/src/CuptiActivity.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ struct CuptiActivity : public ITraceActivity {
int flowType() const override {
return kLinkAsyncCpuGpu;
}
int flowId() const override {
int64_t flowId() const override {
return correlationId();
}
const T& raw() const {
Expand Down
13 changes: 13 additions & 0 deletions libkineto/src/CuptiActivityApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ void CuptiActivityApi::bufferRequested(
size_t* size,
size_t* maxNumRecords) {
std::lock_guard<std::mutex> guard(mutex_);
LOG(VERBOSE) << "CUPTI buffer requested";
if (allocatedGpuTraceBuffers_.size() >= maxGpuBufferCount_) {
stopCollection = true;
LOG(WARNING) << "Exceeded max GPU buffer count ("
Expand Down Expand Up @@ -340,9 +341,21 @@ void CuptiActivityApi::enableCuptiActivities(
}
if (activity == ActivityType::CUDA_RUNTIME) {
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME));
#if (CUDART_VERSION >= 12050)
CUPTI_CALL(cuptiActivityEnableRuntimeApi(
CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020, 0));
#endif
}
if (activity == ActivityType::CUDA_DRIVER) {
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER));
#if (CUDART_VERSION >= 12050)
CUPTI_CALL(cuptiActivityEnableDriverApi(
CUPTI_DRIVER_TRACE_CBID_cuKernelGetAttribute, 0));
CUPTI_CALL(cuptiActivityEnableDriverApi(
CUPTI_DRIVER_TRACE_CBID_cuDevicePrimaryCtxGetState, 0));
CUPTI_CALL(cuptiActivityEnableDriverApi(
CUPTI_DRIVER_TRACE_CBID_cuCtxGetCurrent, 0));
#endif
}
if (activity == ActivityType::OVERHEAD) {
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD));
Expand Down
25 changes: 18 additions & 7 deletions libkineto/src/CuptiActivityProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "CuptiActivityProfiler.h"
#include <fmt/format.h>
#include <fmt/ranges.h>
#include <time.h>
#include <atomic>
#include <cstdint>
Expand All @@ -29,6 +30,7 @@
#endif

#include "Config.h"
#include "DeviceProperties.h"
#include "DeviceUtil.h"
#include "time_since_epoch.h"
#ifdef HAS_CUPTI
Expand Down Expand Up @@ -306,7 +308,17 @@ void CuptiActivityProfiler::processTraceInternal(ActivityLogger& logger) {
for (auto& pair : versionMetadata_) {
addMetadata(pair.first, pair.second);
}
logger.handleTraceStart(metadata_);
std::vector<std::string> device_properties;
if (auto props = devicePropertiesJson(); !props.empty()) {
device_properties.push_back(props);
}
for (const auto& session : sessions_) {
if (auto props = session->getDeviceProperties(); !props.empty()) {
device_properties.push_back(props);
}
}
logger.handleTraceStart(
metadata_, fmt::format("{}", fmt::join(device_properties, ",")));
setCpuActivityPresent(false);
setGpuActivityPresent(false);
for (auto& cpu_trace : traceBuffers_->cpu) {
Expand Down Expand Up @@ -1042,11 +1054,10 @@ void CuptiActivityProfiler::configure(

// Set useful metadata into the logger.
LOGGER_OBSERVER_SET_TRACE_DURATION_MS(config_->activitiesDuration().count());
LOGGER_OBSERVER_SET_TRACE_ID(config_->requestTraceID());
LOGGER_OBSERVER_SET_GROUP_TRACE_ID(config_->requestGroupTraceID());
if (!config_->requestTraceID().empty()) {
LOGGER_OBSERVER_SET_TRACE_ID(config_->requestTraceID());
}
if (!config_->requestGroupTraceID().empty()) {
LOGGER_OBSERVER_SET_GROUP_TRACE_ID(config_->requestGroupTraceID());
addMetadata("trace_id", "\"" + config_->requestTraceID() + "\"");
}

#if defined(HAS_CUPTI) || defined(HAS_ROCTRACER)
Expand Down Expand Up @@ -1357,10 +1368,10 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
void CuptiActivityProfiler::finalizeTrace(
const Config& config,
ActivityLogger& logger) {
LOG(INFO) << "Traces Recorded:";
LOG(INFO) << "CPU Traces Recorded:";
{
for (const auto& it : iterationCountMap_) {
LOG(INFO) << it.first << ": " << it.second << " iterations";
LOG(INFO) << it.first << ": " << it.second << " span(s) recorded";
}
iterationCountMap_.clear();
}
Expand Down
4 changes: 2 additions & 2 deletions libkineto/src/CuptiNvPerfMetric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifdef HAS_CUPTI
#include <cuda_runtime_api.h>
#if defined(USE_CUPTI_RANGE_PROFILER) && defined(CUDART_VERSION) && \
CUDART_VERSION > 10000
CUDART_VERSION > 10000 && CUDART_VERSION < 12060
#include <nvperf_cuda_host.h>
#include <nvperf_host.h>
#include <nvperf_target.h>
Expand Down Expand Up @@ -46,7 +46,7 @@ namespace nvperf {
// After CUDA RT 11.04, the structure has changed.
// TODO update the structure NVPA_RawMetricsConfig to support 11.04
#if defined(USE_CUPTI_RANGE_PROFILER) && defined(CUDART_VERSION) && \
CUDART_VERSION > 10000
CUDART_VERSION > 10000 && CUDART_VERSION < 12060

bool getRawMetricRequests(
NVPA_MetricsContext* metricsContext,
Expand Down
2 changes: 1 addition & 1 deletion libkineto/src/RoctracerActivity.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct RoctracerActivity : public ITraceActivity {
int flowType() const override {
return kLinkAsyncCpuGpu;
}
int flowId() const override {
int64_t flowId() const override {
return correlationId();
}
const T& raw() const {
Expand Down
2 changes: 0 additions & 2 deletions libkineto/src/RoctracerLogger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,6 @@ void RoctracerLogger::activity_callback(
const char* begin,
const char* end,
void* arg) {
RoctracerLogger* dis = &singleton();

// Log latest completed correlation id. Used to ensure we have flushed all
// data on stop
std::unique_lock<std::mutex> lock(s_flush.mutex_);
Expand Down
25 changes: 23 additions & 2 deletions libkineto/src/output_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ static constexpr const char* kOutSplit = "Out split size";
static constexpr const char* kProcessGroupName = "Process Group Name";
static constexpr const char* kProcessGroupDesc = "Process Group Description";
static constexpr const char* kGroupRanks = "Process Group Ranks";
static constexpr const char* kInTensorsStart = "Input Tensors start";
static constexpr const char* kOutTensorsStart = "Output Tensors start";
static constexpr const char* kRank = "Rank";
static constexpr const char* kP2pSrc = "Src Rank";
static constexpr const char* kP2pDst = "Dst Rank";
Expand Down Expand Up @@ -105,7 +107,8 @@ void ChromeTraceLogger::metadataToJSON(
}

void ChromeTraceLogger::handleTraceStart(
const std::unordered_map<std::string, std::string>& metadata) {
const std::unordered_map<std::string, std::string>& metadata,
const std::string& device_properties) {
traceOf_ << fmt::format(
R"JSON(
{{
Expand All @@ -116,7 +119,7 @@ void ChromeTraceLogger::handleTraceStart(
R"JSON(
"deviceProperties": [{}
],)JSON",
devicePropertiesJson());
device_properties);

metadataToJSON(metadata);
traceOf_ << R"JSON(
Expand Down Expand Up @@ -418,6 +421,24 @@ void ChromeTraceLogger::handleActivity(const libkineto::ITraceActivity& op) {
kDtype,
dtype));
}
const auto& input_tensor_starts =
collectiveRecord->getMetadataValue(kInTensorsStart);
const auto output_tensor_starts =
collectiveRecord->getMetadataValue(kOutTensorsStart);
if (!input_tensor_starts.empty()) {
if (!arg_values.empty()) {
arg_values.append(",");
}
arg_values.append(
fmt::format(" \"{}\": {}", kInTensorsStart, input_tensor_starts));
}
if (!output_tensor_starts.empty()) {
if (!arg_values.empty()) {
arg_values.append(",");
}
arg_values.append(
fmt::format(" \"{}\": {}", kOutTensorsStart, output_tensor_starts));
}
// In/out split size are valid for all_to_all
const auto& inSplitSize = collectiveRecord->getMetadataValue(kInSplit);
const auto& outSplitSize = collectiveRecord->getMetadataValue(kOutSplit);
Expand Down
3 changes: 2 additions & 1 deletion libkineto/src/output_json.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ class ChromeTraceLogger : public libkineto::ActivityLogger {
void handleGenericActivity(const GenericTraceActivity& activity) override;

void handleTraceStart(
const std::unordered_map<std::string, std::string>& metadata) override;
const std::unordered_map<std::string, std::string>& metadata,
const std::string& device_properties) override;

void finalizeTrace(
const Config& config,
Expand Down
7 changes: 5 additions & 2 deletions libkineto/src/output_membuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ class MemoryTraceLogger : public ActivityLogger {
}

void handleTraceStart(
const std::unordered_map<std::string, std::string>& metadata) override {
const std::unordered_map<std::string, std::string>& metadata,
const std::string& device_properties) override {
metadata_ = metadata;
device_properties_ = device_properties;
}

void finalizeTrace(
Expand All @@ -81,7 +83,7 @@ class MemoryTraceLogger : public ActivityLogger {
}

void log(ActivityLogger& logger) {
logger.handleTraceStart(metadata_);
logger.handleTraceStart(metadata_, device_properties_);
for (auto& activity : activities_) {
activity->log(logger);
}
Expand Down Expand Up @@ -121,6 +123,7 @@ class MemoryTraceLogger : public ActivityLogger {
std::unique_ptr<ActivityBuffers> buffers_;
std::unordered_map<std::string, std::string> metadata_;
std::unordered_map<std::string, std::vector<std::string>> loggerMetadata_;
std::string device_properties_;
int64_t endTime_{0};
std::shared_ptr<ActivityLogger> chrome_logger_;
};
Expand Down
Loading

0 comments on commit 5042a21

Please sign in to comment.