diff --git a/docs/Makefile.am b/docs/Makefile.am index 981594526d2..d9b66052e9c 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -432,8 +432,10 @@ OMPI_MAN3 = \ MPI_T_category_changed.3 \ MPI_T_category_get_categories.3 \ MPI_T_category_get_cvars.3 \ + MPI_T_category_get_events.3 \ MPI_T_category_get_info.3 \ MPI_T_category_get_num.3 \ + MPI_T_category_get_num_events.3 \ MPI_T_category_get_pvars.3 \ MPI_T_cvar_get_info.3 \ MPI_T_cvar_get_num.3 \ diff --git a/docs/man-openmpi/man3/MPI_T_category_get_events.3.rst b/docs/man-openmpi/man3/MPI_T_category_get_events.3.rst new file mode 100644 index 00000000000..efec965a433 --- /dev/null +++ b/docs/man-openmpi/man3/MPI_T_category_get_events.3.rst @@ -0,0 +1,50 @@ +.. _mpi_t_category_get_events: + + +MPI_T_category_get_events +========================= + +.. include_body + +:ref:`MPI_T_category_get_events` |mdash| Query which events are in a +category + + +SYNTAX +------ + + +C Syntax +^^^^^^^^ + +.. code-block:: c + + #include + + int MPI_T_category_get_events(int cat_index, int len, int indices[]) + + +INPUT PARAMETERS +---------------- +* ``cat_index``: Index of the category to be queried. +* ``len``: The length of the indices array. + +OUTPUT PARAMETERS +----------------- +* ``indices``: An integer array of size len, indicating event indices. + +DESCRIPTION +----------- + +:ref:`MPI_T_category_get_events` can be used to query which events +are contained in a particular category. + + +ERRORS +------ + +:ref:`MPI_T_category_get_events` will fail if: + +* ``MPI_T_ERR_NOT_INITIALIZED``: The MPI Tools interface not initialized + +* ``MPI_T_ERR_INVALID_INDEX``: The category index is invalid diff --git a/docs/man-openmpi/man3/MPI_T_category_get_num_events.3.rst b/docs/man-openmpi/man3/MPI_T_category_get_num_events.3.rst new file mode 100644 index 00000000000..02c552753b9 --- /dev/null +++ b/docs/man-openmpi/man3/MPI_T_category_get_num_events.3.rst @@ -0,0 +1,47 @@ +.. _mpi_t_category_get_num_events: + + +MPI_T_category_get_num_events +============================= + +.. include_body + +:ref:`MPI_T_category_get_num_events` |mdash| Query returns the number of event types contained +in the queried category. + + +SYNTAX +------ + + +C Syntax +^^^^^^^^ + +.. code-block:: c + + #include + + int MPI_T_category_get_num_events(int cat_index, int *num_events) + + +INPUT PARAMETERS +---------------- +* ``cat_index``: Index of the category to be queried + +OUTPUT PARAMETERS +----------------- +* ``num_events``: Number of event types in the category + +DESCRIPTION +----------- + +:ref:`MPI_T_category_get_num_events` can be used to query the number of events +contained in the category. + + +ERRORS +------ + +:ref:`MPI_T_category_get_num_events` will fail if: + +* ``MPI_T_ERR_NOT_INITIALIZED``: The MPI Tools interface not initialized diff --git a/docs/man-openmpi/man3/index.rst b/docs/man-openmpi/man3/index.rst index 6def45402d1..d05216adaa1 100644 --- a/docs/man-openmpi/man3/index.rst +++ b/docs/man-openmpi/man3/index.rst @@ -357,8 +357,10 @@ MPI API manual pages (section 3) MPI_T_category_changed.3.rst MPI_T_category_get_categories.3.rst MPI_T_category_get_cvars.3.rst + MPI_T_category_get_events.3.rst MPI_T_category_get_info.3.rst MPI_T_category_get_num.3.rst + MPI_T_category_get_num_events.3.rst MPI_T_category_get_pvars.3.rst MPI_T_cvar_get_info.3.rst MPI_T_cvar_get_num.3.rst diff --git a/ompi/include/mpi.h.in b/ompi/include/mpi.h.in index ced7340baa2..4886868c37c 100644 --- a/ompi/include/mpi.h.in +++ b/ompi/include/mpi.h.in @@ -3760,11 +3760,13 @@ OMPI_DECLSPEC int PMPI_T_cvar_handle_free (MPI_T_cvar_handle *handle); OMPI_DECLSPEC int PMPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf); OMPI_DECLSPEC int PMPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf); OMPI_DECLSPEC int PMPI_T_category_get_num(int *num_cat); +OMPI_DECLSPEC int PMPI_T_category_get_num_events (int cat_index, int *num_events); OMPI_DECLSPEC int PMPI_T_category_get_info(int cat_index, char *name, int *name_len, char *desc, int *desc_len, int *num_cvars, int *num_pvars, int *num_categories); OMPI_DECLSPEC int PMPI_T_category_get_index (const char *name, int *category_index); OMPI_DECLSPEC int PMPI_T_category_get_cvars(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int PMPI_T_category_get_events(int cat_index, int len, int indices[]); OMPI_DECLSPEC int PMPI_T_category_get_pvars(int cat_index, int len, int indices[]); OMPI_DECLSPEC int PMPI_T_category_get_categories(int cat_index, int len, int indices[]); OMPI_DECLSPEC int PMPI_T_category_changed(int *stamp); @@ -3845,11 +3847,13 @@ OMPI_DECLSPEC int MPI_T_cvar_handle_free (MPI_T_cvar_handle *handle); OMPI_DECLSPEC int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf); OMPI_DECLSPEC int MPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf); OMPI_DECLSPEC int MPI_T_category_get_num(int *num_cat); +OMPI_DECLSPEC int MPI_T_category_get_num_events (int cat_index, int *num_events); OMPI_DECLSPEC int MPI_T_category_get_info(int cat_index, char *name, int *name_len, char *desc, int *desc_len, int *num_cvars, int *num_pvars, int *num_categories); OMPI_DECLSPEC int MPI_T_category_get_index (const char *name, int *category_index); OMPI_DECLSPEC int MPI_T_category_get_cvars(int cat_index, int len, int indices[]); +OMPI_DECLSPEC int MPI_T_category_get_events(int cat_index, int len, int indices[]); OMPI_DECLSPEC int MPI_T_category_get_pvars(int cat_index, int len, int indices[]); OMPI_DECLSPEC int MPI_T_category_get_categories(int cat_index, int len, int indices[]); OMPI_DECLSPEC int MPI_T_category_changed(int *stamp); diff --git a/ompi/mca/op/aarch64/configure.m4 b/ompi/mca/op/aarch64/configure.m4 index 08d62b2e2f8..fedc9adf6ce 100644 --- a/ompi/mca/op/aarch64/configure.m4 +++ b/ompi/mca/op/aarch64/configure.m4 @@ -13,10 +13,11 @@ # # MCA_ompi_op_arm_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) +# [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[ AC_CONFIG_FILES([ompi/mca/op/aarch64/Makefile]) + case "${host}" in aarch64*|arm64*) op_aarch64_check="yes";; @@ -71,36 +72,74 @@ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[ [op_cv_neon_fp_support=yes], [op_cv_neon_fp_support=no])])]) - # + + # # Check for SVE support # - AC_CACHE_CHECK([for SVE support], op_cv_sve_support, - [AS_IF([test "$op_cv_neon_support" = "yes"], - [ - AC_LINK_IFELSE( - [AC_LANG_PROGRAM([[ + AC_CACHE_CHECK([for SVE support], [op_cv_sve_support], [ + AC_MSG_RESULT([]) + # initialize result variables + op_cv_sve_support=no + op_cv_sve_add_flags=no + + # first attempt: no extra flags + AC_MSG_CHECKING([for SVE support (no additional flags)]) + AC_LINK_IFELSE( + [AC_LANG_SOURCE([[ #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE) -#include + #include #else -#error "No support for __aarch64__ or SVE" + #error "No support for __aarch64__ or SVE" #endif - ]], - [[ -#if defined(__aarch64__) && defined(_ARM_FEATURE_SVE) - svfloat32_t vA; - vA = svdup_n_f32(0) + +int main(void) { + svfloat32_t vA; + vA = svdup_n_f32(0); + return 0; +} + ]])], + [ op_cv_sve_support=yes + AC_MSG_RESULT([yes]) ], + [ AC_MSG_RESULT([no ]) ] + ) + + # second attempt: use +sve attribute + AS_IF([test "$op_cv_sve_support" = "no"],[ + AC_MSG_CHECKING([for SVE support (with +sve)]) + AC_LINK_IFELSE( + [AC_LANG_SOURCE([[ +#if defined(__aarch64__) + #include +#else + #error "not on aarch64" #endif - ]])], - [op_cv_sve_support=yes], - [op_cv_sve_support=no])])]) - ]) +__attribute__((__target__("+sve"))) +int main(void) { + svbool_t pg = svptrue_b32(); + svuint32_t a = svdup_u32(0); + svuint32_t b = svdup_u32(0); + svuint32_t c = svadd_u32_m(pg, a, b); + return (int)svaddv_u32(pg, c); +} + ]])], + [ op_cv_sve_support=yes + op_cv_sve_add_flags=yes + AC_MSG_RESULT([yes]) ], + [ AC_MSG_RESULT([no ]) ] + ) + ]) + ]) + + AC_LANG_POP +]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_support], [test "$op_cv_neon_support" = "yes"]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_fp_support], [test "$op_cv_neon_fp_support" = "yes"]) AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sve_support], [test "$op_cv_sve_support" = "yes"]) + AC_SUBST(MCA_BUILD_ompi_op_has_neon_support) AC_SUBST(MCA_BUILD_ompi_op_has_neon_fp_support) AC_SUBST(MCA_BUILD_ompi_op_has_sve_support) @@ -111,9 +150,13 @@ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[ [AC_DEFINE([OMPI_MCA_OP_HAVE_NEON_FP], [1],[NEON FP supported in the current build])]) AS_IF([test "$op_cv_sve_support" = "yes"], [AC_DEFINE([OMPI_MCA_OP_HAVE_SVE], [1],[SVE supported in the current build])]) + AS_IF([test "$op_cv_sve_add_flags" = "yes"], + [AC_DEFINE([OMPI_MCA_OP_SVE_EXTRA_FLAGS], [1],[SVE supported with additional compile attributes])]) - # If we have at least support for Neon - AS_IF([test "$op_cv_neon_support" = "yes"], + + # If we have at least support for Neon or SVE + AS_IF([test "$op_cv_neon_support" = "yes" || test "$op_cv_sve_support" = "yes" ], [$1], [$2]) + ])dnl diff --git a/ompi/mca/op/aarch64/op_aarch64.h b/ompi/mca/op/aarch64/op_aarch64.h index 22be89614c6..46e80d27fb7 100644 --- a/ompi/mca/op/aarch64/op_aarch64.h +++ b/ompi/mca/op/aarch64/op_aarch64.h @@ -24,6 +24,12 @@ BEGIN_C_DECLS +#if defined(OMPI_MCA_OP_SVE_EXTRA_FLAGS) +#define SVE_ATTR __attribute__ ((__target__ ("+sve"))) +#else +#define SVE_ATTR +#endif + /** * Derive a struct from the base op component struct, allowing us to * cache some component-specific information on our well-known diff --git a/ompi/mca/op/aarch64/op_aarch64_component.c b/ompi/mca/op/aarch64/op_aarch64_component.c index 92bf4fa3656..6fde84137f7 100644 --- a/ompi/mca/op/aarch64/op_aarch64_component.c +++ b/ompi/mca/op/aarch64/op_aarch64_component.c @@ -101,7 +101,7 @@ static int mca_op_aarch64_component_close(void) /* * Register MCA params. */ -static int mca_op_aarch64_component_register(void) +SVE_ATTR static int mca_op_aarch64_component_register(void) { mca_op_aarch64_component.hardware_available = 1; /* Check for Neon */ diff --git a/ompi/mca/op/aarch64/op_aarch64_functions.c b/ompi/mca/op/aarch64/op_aarch64_functions.c index 5515ddb9567..61e9f53a0b9 100644 --- a/ompi/mca/op/aarch64/op_aarch64_functions.c +++ b/ompi/mca/op/aarch64/op_aarch64_functions.c @@ -136,24 +136,25 @@ _Generic((*(out)), \ } \ } #elif defined(GENERATE_SVE_CODE) -#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \ +#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \ + SVE_ATTR \ static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \ - (const void *_in, void *_out, int *count, \ - struct ompi_datatype_t **dtype, \ - struct ompi_op_base_module_1_0_0_t *module) \ - { \ - const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \ - const int cnt = *count; \ - type##type_size##_t *in = (type##type_size##_t *) _in, \ - *out = (type##type_size##_t *) _out; \ - OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \ - for (int idx=0; idx < cnt; idx += types_per_step) { \ - svbool_t pred = svwhilelt_b##type_size(idx, cnt); \ - vsrc = svld1(pred, &in[idx]); \ - vdst = svld1(pred, &out[idx]); \ - vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \ - OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \ - } \ + (const void *_in, void *_out, int *count, \ + struct ompi_datatype_t **dtype, \ + struct ompi_op_base_module_1_0_0_t *module) \ + { \ + const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \ + const int cnt = *count; \ + type##type_size##_t *in = (type##type_size##_t *) _in, \ + *out = (type##type_size##_t *) _out; \ + OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \ + for (int idx=0; idx < cnt; idx += types_per_step) { \ + svbool_t pred = svwhilelt_b##type_size(idx, cnt); \ + vsrc = svld1(pred, &in[idx]); \ + vdst = svld1(pred, &out[idx]); \ + vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \ + OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \ + } \ } #endif @@ -302,25 +303,26 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE } \ } #elif defined(GENERATE_SVE_CODE) -#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \ -static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \ - (const void *_in1, const void *_in2, void *_out, int *count, \ - struct ompi_datatype_t **dtype, \ - struct ompi_op_base_module_1_0_0_t *module) \ -{ \ - const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \ - type##type_size##_t *in1 = (type##type_size##_t *) _in1, \ - *in2 = (type##type_size##_t *) _in2, \ - *out = (type##type_size##_t *) _out; \ - const int cnt = *count; \ - OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \ - for (int idx=0; idx < cnt; idx += types_per_step) { \ - svbool_t pred = svwhilelt_b##type_size(idx, cnt); \ - vsrc = svld1(pred, &in1[idx]); \ - vdst = svld1(pred, &in2[idx]); \ - vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \ - OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \ - } \ +#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \ + SVE_ATTR \ + static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \ + (const void *_in1, const void *_in2, void *_out, int *count, \ + struct ompi_datatype_t **dtype, \ + struct ompi_op_base_module_1_0_0_t *module) \ +{ \ + const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \ + type##type_size##_t *in1 = (type##type_size##_t *) _in1, \ + *in2 = (type##type_size##_t *) _in2, \ + *out = (type##type_size##_t *) _out; \ + const int cnt = *count; \ + OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \ + for (int idx=0; idx < cnt; idx += types_per_step) { \ + svbool_t pred = svwhilelt_b##type_size(idx, cnt); \ + vsrc = svld1(pred, &in1[idx]); \ + vdst = svld1(pred, &in2[idx]); \ + vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \ + OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \ + } \ } #endif /* defined(GENERATE_SVE_CODE) */ diff --git a/ompi/mpi/tool/Makefile.am b/ompi/mpi/tool/Makefile.am index 8d9c957c21e..72cb71b9b60 100644 --- a/ompi/mpi/tool/Makefile.am +++ b/ompi/mpi/tool/Makefile.am @@ -6,7 +6,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2012-2018 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2018-2025 Triad National Security, LLC. All rightsa +# Copyright (c) 2018-2025 Triad National Security, LLC. All rights # reserved. # Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights # reserved. @@ -54,9 +54,11 @@ interface_profile_sources = \ category_changed.c \ category_get_categories.c \ category_get_cvars.c \ + category_get_events.c \ category_get_info.c \ category_get_index.c \ category_get_num.c \ + category_get_num_events.c \ category_get_pvars.c \ cvar_get_info.c \ cvar_get_index.c \ diff --git a/ompi/mpi/tool/category_get_events.c b/ompi/mpi/tool/category_get_events.c new file mode 100644 index 00000000000..3c6576b79f8 --- /dev/null +++ b/ompi/mpi/tool/category_get_events.c @@ -0,0 +1,40 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2025 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/tool/mpit-internal.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_T_category_get_events = PMPI_T_category_get_events +#endif +#define MPI_T_category_get_events PMPI_T_category_get_events +#endif + +int MPI_T_category_get_events(int cat_index, int len, int indices[]) +{ + int rc = MPI_SUCCESS; + + if (!mpit_is_initialized ()) { + return MPI_T_ERR_NOT_INITIALIZED; + } + return rc; +} diff --git a/ompi/mpi/tool/category_get_num_events.c b/ompi/mpi/tool/category_get_num_events.c new file mode 100644 index 00000000000..b7e49070323 --- /dev/null +++ b/ompi/mpi/tool/category_get_num_events.c @@ -0,0 +1,45 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2017 IBM Corporation. All rights reserved. + * Copyright (c) 2020 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights + * reserved. + * Copyright (c) 2025 Triad National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mpi/tool/mpit-internal.h" + +#if OMPI_BUILD_MPI_PROFILING +#if OPAL_HAVE_WEAK_SYMBOLS +#pragma weak MPI_T_category_get_num_events = PMPI_T_category_get_num_events +#endif +#define MPI_T_category_get_num_events PMPI_T_category_get_num_events +#endif + +int MPI_T_category_get_num_events (int cat_index, int *num_events) +{ + if (!mpit_is_initialized ()) { + return MPI_T_ERR_NOT_INITIALIZED; + } + + if (MPI_PARAM_CHECK && NULL == num_events) { + return MPI_T_ERR_INVALID; + } + + *num_events = 0; + + return MPI_SUCCESS; +}