Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions libkineto/src/CuptiRangeProfilerApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
* LICENSE file in the root directory of this source tree.
*/

#include <cstdio>
#include <cstdlib>
#include "ILoggerObserver.h"
#ifdef HAS_CUPTI
Expand All @@ -24,6 +23,7 @@

// TODO(T90238193)
// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
#include "CuptiCallbackApi.h"
#include "CuptiRangeProfilerApi.h"

#define STRINGIFY(x) #x
Expand Down Expand Up @@ -218,7 +218,7 @@ void __trackCudaKernelLaunch(
profiler->numCallbacks_++;
}

bool enableKernelCallbacks() {
static bool enableKernelCallbacks() {
auto cbapi = CuptiCallbackApi::singleton();

bool status = cbapi->enableCallback(
Expand Down
11 changes: 4 additions & 7 deletions libkineto/src/CuptiRangeProfilerApi.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@
#include <cuda_runtime_api.h>
// Using CUDA 11 and above due to usage of API:
// cuptiProfilerGetCounterAvailability.
// Starting from CUDA 12.06 the Profiler API is superseded by Range Profiler API
// Starting from CUDA 12.6 the Profiler API is superseded by Range Profiler API
// This needs significant rework. See
// https://docs.nvidia.com/cupti/main/main.html#evolution-of-the-profiling-apis
#if defined(USE_CUPTI_RANGE_PROFILER) && defined(CUDART_VERSION) && \
CUDART_VERSION >= 10000 && CUDA_VERSION >= 11000 && CUDA_VERSION <= 12060
CUDART_VERSION >= 10000 && CUDA_VERSION >= 12060
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure this is correct. I recall the range profiler breaking on later versions

Copy link
Contributor Author

@cyyever cyyever Aug 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sraikund16 Can reproduce? Nevertheless, the guard is wrong according to the above comment. If the profile still breaks, we should fix it rather than incorrectly disabling it.

Copy link
Contributor

@sraikund16 sraikund16 Aug 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cyyever based on the NV link provided, it looks like we can use the current APIs until 13.0 although it is not recommended. However, by 13.0 we will need to use the new APIs as the old ones are deprecated. Either way it would require a rework.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the current APIs exist during the whole CUDA 13 cycle, then we have lots of time for refactoring.

#define HAS_CUPTI_RANGE_PROFILER 1
#endif // CUDART_VERSION > 10.00 and CUDA_VERSION >= 11.00 and CUDA_VERSION
// <= 12.06
#endif // CUDART_VERSION > 10.00 and CUDA_VERSION >= 12.6
#endif // HAS_CUPTI

#if HAS_CUPTI_RANGE_PROFILER
Expand All @@ -40,14 +39,13 @@ enum CUpti_ProfilerReplayMode {
#endif // HAS_CUPTI_RANGE_PROFILER

#include <chrono>
#include <mutex>
#include <set>
#include <string>
#include <vector>

// TODO(T90238193)
// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
#include "CuptiCallbackApi.h"
// #include "CuptiCallbackApi.h"
#include "CuptiNvPerfMetric.h"
#include "TraceSpan.h"

Expand Down Expand Up @@ -214,7 +212,6 @@ class CuptiRBProfilerSession {
struct ICuptiRBProfilerSessionFactory {
virtual std::unique_ptr<CuptiRBProfilerSession> make(
const CuptiRangeProfilerOptions& opts) = 0;
virtual ~ICuptiRBProfilerSessionFactory() {}
};

struct CuptiRBProfilerSessionFactory : ICuptiRBProfilerSessionFactory {
Expand Down