diff --git a/nvbench/cupti_profiler.cuh b/nvbench/cupti_profiler.cuh index 214706a..d3351a8 100644 --- a/nvbench/cupti_profiler.cuh +++ b/nvbench/cupti_profiler.cuh @@ -66,6 +66,7 @@ class cupti_profiler // Counter data std::vector m_metric_names; + std::vector m_verified_metric_names; std::vector m_data_image_prefix; std::vector m_config_image; std::vector m_data_image; @@ -116,6 +117,7 @@ private: void initialize_profiler(); void initialize_chip_name(); void initialize_availability_image(); + void verify_metric_names(); static void initialize_nvpw(); void initialize_config_image(); void initialize_counter_data_prefix_image(); diff --git a/nvbench/cupti_profiler.cxx b/nvbench/cupti_profiler.cxx index 6ce1cf7..1fb7217 100644 --- a/nvbench/cupti_profiler.cxx +++ b/nvbench/cupti_profiler.cxx @@ -68,11 +68,18 @@ cupti_profiler::cupti_profiler(nvbench::device_info device, std::vector metric_names with the metrics that are available within the GPU. + Failing gracefully will enable the measurement of other metrics if a metric is not available + + Using NVPW_MetricsEvaluator_GetMetricNames to see what metrics are available, and removing from the vector the ones that are not available. + + */ initialize_nvpw(); initialize_config_image(); initialize_counter_data_prefix_image(); initialize_counter_data_image(); - m_available = true; } @@ -140,6 +147,7 @@ void cupti_profiler::initialize_availability_image() cupti_call(cuptiProfilerGetCounterAvailability(¶ms)); } + void cupti_profiler::initialize_nvpw() { NVPW_InitializeHost_Params params{}; @@ -149,7 +157,7 @@ void cupti_profiler::initialize_nvpw() namespace { - +// Eval Requests converts a single Human Readable CUPTI metric to a CUPTI metric evaluation request. class eval_request { NVPW_MetricsEvaluator *evaluator_ptr; @@ -169,7 +177,7 @@ class eval_request nvpw_call(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(¶ms)); } - + // Gets the depedencies of a metric name, ie what specific pointers is needed for a metric [[nodiscard]] std::vector get_raw_dependencies() { std::vector raw_dependencies; @@ -195,7 +203,7 @@ class eval_request NVPW_MetricEvalRequest request; }; - +// Is responsible for the initialisation of the metric evaluator, so it is used to do eval requests class metric_evaluator { bool initialized{}; @@ -214,7 +222,6 @@ class metric_evaluator NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE; scratch_buffer_param.pChipName = chip_name.c_str(); scratch_buffer_param.pCounterAvailabilityImage = counter_availability_image; - nvpw_call(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(&scratch_buffer_param)); scratch_buffer.resize(scratch_buffer_param.scratchBufferSize); @@ -234,6 +241,21 @@ class metric_evaluator evaluator_ptr = evaluator_params.pMetricsEvaluator; initialized = true; } + std::vector list_metrics(){ + std::vector available_metrics; + NVPW_MetricsEvaluator_GetMetricNames_Params list_metrics_params{}; + list_metrics_params.structSize = NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE; + list_metrics_params.metricType = NVPW_MetricType::NVPW_METRIC_TYPE_THROUGHPUT; + list_metrics_params.pMetricsEvaluator = evaluator_ptr; + available_metrics.resize(list_metrics_params.numMetrics); + + nvpw_call(NVPW_MetricsEvaluator_GetMetricNames(&list_metrics_params)); + for (size_t metric_ix = 0; metric_ix < list_metrics_params.numMetrics; metric_ix++){ + size_t start_metric_ix = list_metrics_params.pMetricNameBeginIndices[metric_ix]; + available_metrics.push_back(&list_metrics_params.pMetricNames[start_metric_ix]); + } + return available_metrics; + } ~metric_evaluator() { @@ -260,7 +282,7 @@ class metric_evaluator namespace { - +// Gets the rawMectrics for each high level metrics, and put it into a RawMetricsRequest Vector [[nodiscard]] std::vector get_raw_metric_requests(const std::string &chip_name, const std::vector &metric_names, @@ -298,7 +320,7 @@ get_raw_metric_requests(const std::string &chip_name, class metrics_config { bool initialized{}; - + // Inits a rawMetricsConfig from an availability_image or chip_name void create(const std::string &chip_name, const std::uint8_t *availability_image) { NVPW_CUDA_RawMetricsConfig_Create_V2_Params params{}; @@ -313,7 +335,7 @@ class metrics_config raw_metrics_config = params.pRawMetricsConfig; initialized = true; } - + // Put the availability image into the raw_metrics_config void set_availability_image(const std::uint8_t *availability_image) { NVPW_RawMetricsConfig_SetCounterAvailability_Params params{}; @@ -324,7 +346,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_SetCounterAvailability(¶ms)); } - + // Create a new group of metrics to measure void begin_config_group() { NVPW_RawMetricsConfig_BeginPassGroup_Params params{}; @@ -334,7 +356,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_BeginPassGroup(¶ms)); } - + // Add the array of rawMetrics to the actual config void add_metrics(const std::vector &raw_metric_requests) { NVPW_RawMetricsConfig_AddMetrics_Params params{}; @@ -346,7 +368,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_AddMetrics(¶ms)); } - + // End the config group configuration void end_config_group() { NVPW_RawMetricsConfig_EndPassGroup_Params params{}; @@ -356,7 +378,7 @@ class metrics_config nvpw_call(NVPW_RawMetricsConfig_EndPassGroup(¶ms)); } - + // Finalize the image for the configuration void generate() { NVPW_RawMetricsConfig_GenerateConfigImage_Params params{}; @@ -368,6 +390,7 @@ class metrics_config } public: + // Initalize a metric config pass with the RawMetricRequests and builds its config image metrics_config(const std::string &chip_name, const std::vector &raw_metric_requests, const std::uint8_t *availability_image) @@ -380,7 +403,7 @@ class metrics_config end_config_group(); generate(); } - + // Retreive the config image [[nodiscard]] std::vector get_config_image() { NVPW_RawMetricsConfig_GetConfigImage_Params params{}; @@ -418,6 +441,17 @@ class metrics_config } // namespace +void cupti_profiler::verify_metric_names() +{ + metric_evaluator evaluator(m_chip_name, m_availability_image.data()); + m_verified_metric_names = evaluator.list_metrics(); + printf("Metrics \n");//TEMPORARY - Asserting functionnality + for(std::string item:m_verified_metric_names){ + printf("%s\n", item.c_str()); + } + printf("---------------------\n"); +} + void cupti_profiler::initialize_config_image() { m_config_image = metrics_config(m_chip_name, @@ -508,7 +542,7 @@ void cupti_profiler::initialize_counter_data_prefix_image() namespace { - +// From the data_image_prefix, get the counter data imagesize [[nodiscard]] std::size_t get_counter_data_image_size(CUpti_Profiler_CounterDataImageOptions *options) { @@ -691,7 +725,7 @@ void cupti_profiler::process_user_loop() cupti_call(cuptiProfilerEndSession(¶ms)); } } - +//Instead of returning a std::vector, maybe returning a map string -> double, with the string being the .pct name. std::vector cupti_profiler::get_counter_values() { metric_evaluator evaluator(m_chip_name, diff --git a/nvbench/detail/measure_cupti.cu b/nvbench/detail/measure_cupti.cu index 24028f2..e6969ba 100644 --- a/nvbench/detail/measure_cupti.cu +++ b/nvbench/detail/measure_cupti.cu @@ -159,7 +159,16 @@ std::vector add_metrics(nvbench::state &state) } } // namespace +/* + struct CustomCuptiMetrics{ + const char *metric_name; + const char *name; + const char *hint; + const char *description; + const double divider; + } +*/ measure_cupti_base::measure_cupti_base(state &exec_state) // clang-format off // (formatter doesn't handle `try :` very well...) @@ -167,6 +176,7 @@ try : m_state{exec_state} , m_launch{exec_state.get_cuda_stream()} , m_cupti{*m_state.get_device(), add_metrics(m_state)} + //Inside the state, have a std::vector, so when calling add_metrics, it adds the ones created by the user. {} // clang-format on catch (const std::exception &ex)