Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions nvbench/cupti_profiler.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class cupti_profiler

// Counter data
std::vector<std::string> m_metric_names;
std::vector<std::string> m_verified_metric_names;
std::vector<std::uint8_t> m_data_image_prefix;
std::vector<std::uint8_t> m_config_image;
std::vector<std::uint8_t> m_data_image;
Expand Down Expand Up @@ -116,6 +117,7 @@ private:
void initialize_profiler();
void initialize_chip_name();
void initialize_availability_image();
void verify_metric_names();
static void initialize_nvpw();
void initialize_config_image();
void initialize_counter_data_prefix_image();
Expand Down
64 changes: 49 additions & 15 deletions nvbench/cupti_profiler.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,18 @@ cupti_profiler::cupti_profiler(nvbench::device_info device, std::vector<std::str
initialize_profiler();
initialize_chip_name();
initialize_availability_image();
verify_metric_names();
/*
Update the std::vector<std::string> metric_names with the metrics that are available within the GPU.
Failing gracefully will enable the measurement of other metrics if a metric is not available

Using NVPW_MetricsEvaluator_GetMetricNames to see what metrics are available, and removing from the vector the ones that are not available.

*/
initialize_nvpw();
initialize_config_image();
initialize_counter_data_prefix_image();
initialize_counter_data_image();

m_available = true;
}

Expand Down Expand Up @@ -140,6 +147,7 @@ void cupti_profiler::initialize_availability_image()
cupti_call(cuptiProfilerGetCounterAvailability(&params));
}


void cupti_profiler::initialize_nvpw()
{
NVPW_InitializeHost_Params params{};
Expand All @@ -149,7 +157,7 @@ void cupti_profiler::initialize_nvpw()

namespace
{

// Eval Requests converts a single Human Readable CUPTI metric to a CUPTI metric evaluation request.
class eval_request
{
NVPW_MetricsEvaluator *evaluator_ptr;
Expand All @@ -169,7 +177,7 @@ class eval_request

nvpw_call(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(&params));
}

// Gets the depedencies of a metric name, ie what specific pointers is needed for a metric
[[nodiscard]] std::vector<const char *> get_raw_dependencies()
{
std::vector<const char *> raw_dependencies;
Expand All @@ -195,7 +203,7 @@ class eval_request

NVPW_MetricEvalRequest request;
};

// Is responsible for the initialisation of the metric evaluator, so it is used to do eval requests
class metric_evaluator
{
bool initialized{};
Expand All @@ -214,7 +222,6 @@ class metric_evaluator
NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE;
scratch_buffer_param.pChipName = chip_name.c_str();
scratch_buffer_param.pCounterAvailabilityImage = counter_availability_image;

nvpw_call(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(&scratch_buffer_param));

scratch_buffer.resize(scratch_buffer_param.scratchBufferSize);
Expand All @@ -234,6 +241,21 @@ class metric_evaluator
evaluator_ptr = evaluator_params.pMetricsEvaluator;
initialized = true;
}
std::vector<std::string> list_metrics(){
std::vector<std::string> available_metrics;
NVPW_MetricsEvaluator_GetMetricNames_Params list_metrics_params{};
list_metrics_params.structSize = NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE;
list_metrics_params.metricType = NVPW_MetricType::NVPW_METRIC_TYPE_THROUGHPUT;
list_metrics_params.pMetricsEvaluator = evaluator_ptr;
available_metrics.resize(list_metrics_params.numMetrics);

nvpw_call(NVPW_MetricsEvaluator_GetMetricNames(&list_metrics_params));
for (size_t metric_ix = 0; metric_ix < list_metrics_params.numMetrics; metric_ix++){
size_t start_metric_ix = list_metrics_params.pMetricNameBeginIndices[metric_ix];
available_metrics.push_back(&list_metrics_params.pMetricNames[start_metric_ix]);
}
return available_metrics;
}

~metric_evaluator()
{
Expand All @@ -260,7 +282,7 @@ class metric_evaluator

namespace
{

// Gets the rawMectrics for each high level metrics, and put it into a RawMetricsRequest Vector
[[nodiscard]] std::vector<NVPA_RawMetricRequest>
get_raw_metric_requests(const std::string &chip_name,
const std::vector<std::string> &metric_names,
Expand Down Expand Up @@ -298,7 +320,7 @@ get_raw_metric_requests(const std::string &chip_name,
class metrics_config
{
bool initialized{};

// Inits a rawMetricsConfig from an availability_image or chip_name
void create(const std::string &chip_name, const std::uint8_t *availability_image)
{
NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{};
Expand All @@ -313,7 +335,7 @@ class metrics_config
raw_metrics_config = params.pRawMetricsConfig;
initialized = true;
}

// Put the availability image into the raw_metrics_config
void set_availability_image(const std::uint8_t *availability_image)
{
NVPW_RawMetricsConfig_SetCounterAvailability_Params params{};
Expand All @@ -324,7 +346,7 @@ class metrics_config

nvpw_call(NVPW_RawMetricsConfig_SetCounterAvailability(&params));
}

// Create a new group of metrics to measure
void begin_config_group()
{
NVPW_RawMetricsConfig_BeginPassGroup_Params params{};
Expand All @@ -334,7 +356,7 @@ class metrics_config

nvpw_call(NVPW_RawMetricsConfig_BeginPassGroup(&params));
}

// Add the array of rawMetrics to the actual config
void add_metrics(const std::vector<NVPA_RawMetricRequest> &raw_metric_requests)
{
NVPW_RawMetricsConfig_AddMetrics_Params params{};
Expand All @@ -346,7 +368,7 @@ class metrics_config

nvpw_call(NVPW_RawMetricsConfig_AddMetrics(&params));
}

// End the config group configuration
void end_config_group()
{
NVPW_RawMetricsConfig_EndPassGroup_Params params{};
Expand All @@ -356,7 +378,7 @@ class metrics_config

nvpw_call(NVPW_RawMetricsConfig_EndPassGroup(&params));
}

// Finalize the image for the configuration
void generate()
{
NVPW_RawMetricsConfig_GenerateConfigImage_Params params{};
Expand All @@ -368,6 +390,7 @@ class metrics_config
}

public:
// Initalize a metric config pass with the RawMetricRequests and builds its config image
metrics_config(const std::string &chip_name,
const std::vector<NVPA_RawMetricRequest> &raw_metric_requests,
const std::uint8_t *availability_image)
Expand All @@ -380,7 +403,7 @@ class metrics_config
end_config_group();
generate();
}

// Retreive the config image
[[nodiscard]] std::vector<std::uint8_t> get_config_image()
{
NVPW_RawMetricsConfig_GetConfigImage_Params params{};
Expand Down Expand Up @@ -418,6 +441,17 @@ class metrics_config

} // namespace

void cupti_profiler::verify_metric_names()
{
metric_evaluator evaluator(m_chip_name, m_availability_image.data());
m_verified_metric_names = evaluator.list_metrics();
printf("Metrics \n");//TEMPORARY - Asserting functionnality
for(std::string item:m_verified_metric_names){
printf("%s\n", item.c_str());
}
printf("---------------------\n");
}

void cupti_profiler::initialize_config_image()
{
m_config_image = metrics_config(m_chip_name,
Expand Down Expand Up @@ -508,7 +542,7 @@ void cupti_profiler::initialize_counter_data_prefix_image()

namespace
{

// From the data_image_prefix, get the counter data imagesize
[[nodiscard]] std::size_t
get_counter_data_image_size(CUpti_Profiler_CounterDataImageOptions *options)
{
Expand Down Expand Up @@ -691,7 +725,7 @@ void cupti_profiler::process_user_loop()
cupti_call(cuptiProfilerEndSession(&params));
}
}

//Instead of returning a std::vector<double>, maybe returning a map string -> double, with the string being the .pct name.
std::vector<double> cupti_profiler::get_counter_values()
{
metric_evaluator evaluator(m_chip_name,
Expand Down
10 changes: 10 additions & 0 deletions nvbench/detail/measure_cupti.cu
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,24 @@ std::vector<std::string> add_metrics(nvbench::state &state)
}

} // namespace
/*
struct CustomCuptiMetrics{
const char *metric_name;
const char *name;
const char *hint;
const char *description;
const double divider;
}

*/
measure_cupti_base::measure_cupti_base(state &exec_state)
// clang-format off
// (formatter doesn't handle `try :` very well...)
try
: m_state{exec_state}
, m_launch{exec_state.get_cuda_stream()}
, m_cupti{*m_state.get_device(), add_metrics(m_state)}
//Inside the state, have a std::vector<CustomCuptiMetrics>, so when calling add_metrics, it adds the ones created by the user.
{}
// clang-format on
catch (const std::exception &ex)
Expand Down