Skip to content

aarch64 op: Enable two‑stage SVE detection in component configuration #13203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,10 @@ OMPI_MAN3 = \
MPI_T_category_changed.3 \
MPI_T_category_get_categories.3 \
MPI_T_category_get_cvars.3 \
MPI_T_category_get_events.3 \
MPI_T_category_get_info.3 \
MPI_T_category_get_num.3 \
MPI_T_category_get_num_events.3 \
MPI_T_category_get_pvars.3 \
MPI_T_cvar_get_info.3 \
MPI_T_cvar_get_num.3 \
Expand Down
50 changes: 50 additions & 0 deletions docs/man-openmpi/man3/MPI_T_category_get_events.3.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
.. _mpi_t_category_get_events:


MPI_T_category_get_events
=========================

.. include_body

:ref:`MPI_T_category_get_events` |mdash| Query which events are in a
category


SYNTAX
------


C Syntax
^^^^^^^^

.. code-block:: c

#include <mpi.h>

int MPI_T_category_get_events(int cat_index, int len, int indices[])


INPUT PARAMETERS
----------------
* ``cat_index``: Index of the category to be queried.
* ``len``: The length of the indices array.

OUTPUT PARAMETERS
-----------------
* ``indices``: An integer array of size len, indicating event indices.

DESCRIPTION
-----------

:ref:`MPI_T_category_get_events` can be used to query which events
are contained in a particular category.


ERRORS
------

:ref:`MPI_T_category_get_events` will fail if:

* ``MPI_T_ERR_NOT_INITIALIZED``: The MPI Tools interface not initialized

* ``MPI_T_ERR_INVALID_INDEX``: The category index is invalid
47 changes: 47 additions & 0 deletions docs/man-openmpi/man3/MPI_T_category_get_num_events.3.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
.. _mpi_t_category_get_num_events:


MPI_T_category_get_num_events
=============================

.. include_body

:ref:`MPI_T_category_get_num_events` |mdash| Query returns the number of event types contained
in the queried category.


SYNTAX
------


C Syntax
^^^^^^^^

.. code-block:: c

#include <mpi.h>

int MPI_T_category_get_num_events(int cat_index, int *num_events)


INPUT PARAMETERS
----------------
* ``cat_index``: Index of the category to be queried

OUTPUT PARAMETERS
-----------------
* ``num_events``: Number of event types in the category

DESCRIPTION
-----------

:ref:`MPI_T_category_get_num_events` can be used to query the number of events
contained in the category.


ERRORS
------

:ref:`MPI_T_category_get_num_events` will fail if:

* ``MPI_T_ERR_NOT_INITIALIZED``: The MPI Tools interface not initialized
2 changes: 2 additions & 0 deletions docs/man-openmpi/man3/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,10 @@ MPI API manual pages (section 3)
MPI_T_category_changed.3.rst
MPI_T_category_get_categories.3.rst
MPI_T_category_get_cvars.3.rst
MPI_T_category_get_events.3.rst
MPI_T_category_get_info.3.rst
MPI_T_category_get_num.3.rst
MPI_T_category_get_num_events.3.rst
MPI_T_category_get_pvars.3.rst
MPI_T_cvar_get_info.3.rst
MPI_T_cvar_get_num.3.rst
Expand Down
4 changes: 4 additions & 0 deletions ompi/include/mpi.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -3760,11 +3760,13 @@ OMPI_DECLSPEC int PMPI_T_cvar_handle_free (MPI_T_cvar_handle *handle);
OMPI_DECLSPEC int PMPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf);
OMPI_DECLSPEC int PMPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf);
OMPI_DECLSPEC int PMPI_T_category_get_num(int *num_cat);
OMPI_DECLSPEC int PMPI_T_category_get_num_events (int cat_index, int *num_events);
OMPI_DECLSPEC int PMPI_T_category_get_info(int cat_index, char *name, int *name_len,
char *desc, int *desc_len, int *num_cvars,
int *num_pvars, int *num_categories);
OMPI_DECLSPEC int PMPI_T_category_get_index (const char *name, int *category_index);
OMPI_DECLSPEC int PMPI_T_category_get_cvars(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int PMPI_T_category_get_events(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int PMPI_T_category_get_pvars(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int PMPI_T_category_get_categories(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int PMPI_T_category_changed(int *stamp);
Expand Down Expand Up @@ -3845,11 +3847,13 @@ OMPI_DECLSPEC int MPI_T_cvar_handle_free (MPI_T_cvar_handle *handle);
OMPI_DECLSPEC int MPI_T_cvar_read (MPI_T_cvar_handle handle, void *buf);
OMPI_DECLSPEC int MPI_T_cvar_write (MPI_T_cvar_handle handle, const void *buf);
OMPI_DECLSPEC int MPI_T_category_get_num(int *num_cat);
OMPI_DECLSPEC int MPI_T_category_get_num_events (int cat_index, int *num_events);
OMPI_DECLSPEC int MPI_T_category_get_info(int cat_index, char *name, int *name_len,
char *desc, int *desc_len, int *num_cvars,
int *num_pvars, int *num_categories);
OMPI_DECLSPEC int MPI_T_category_get_index (const char *name, int *category_index);
OMPI_DECLSPEC int MPI_T_category_get_cvars(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int MPI_T_category_get_events(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int MPI_T_category_get_pvars(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int MPI_T_category_get_categories(int cat_index, int len, int indices[]);
OMPI_DECLSPEC int MPI_T_category_changed(int *stamp);
Expand Down
83 changes: 63 additions & 20 deletions ompi/mca/op/aarch64/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
#

# MCA_ompi_op_arm_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
AC_CONFIG_FILES([ompi/mca/op/aarch64/Makefile])

case "${host}" in
aarch64*|arm64*)
op_aarch64_check="yes";;
Expand Down Expand Up @@ -71,36 +72,74 @@ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
[op_cv_neon_fp_support=yes],
[op_cv_neon_fp_support=no])])])

#

#
# Check for SVE support
#
AC_CACHE_CHECK([for SVE support], op_cv_sve_support,
[AS_IF([test "$op_cv_neon_support" = "yes"],
[
AC_LINK_IFELSE(
[AC_LANG_PROGRAM([[
AC_CACHE_CHECK([for SVE support], [op_cv_sve_support], [
AC_MSG_RESULT([])
# initialize result variables
op_cv_sve_support=no
op_cv_sve_add_flags=no

# first attempt: no extra flags
AC_MSG_CHECKING([for SVE support (no additional flags)])
AC_LINK_IFELSE(
[AC_LANG_SOURCE([[
#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
#include <arm_sve.h>
#include <arm_sve.h>
#else
#error "No support for __aarch64__ or SVE"
#error "No support for __aarch64__ or SVE"
#endif
]],
[[
#if defined(__aarch64__) && defined(_ARM_FEATURE_SVE)
svfloat32_t vA;
vA = svdup_n_f32(0)

int main(void) {
svfloat32_t vA;
vA = svdup_n_f32(0);
return 0;
}
]])],
[ op_cv_sve_support=yes
AC_MSG_RESULT([yes]) ],
[ AC_MSG_RESULT([no ]) ]
)

# second attempt: use +sve attribute
AS_IF([test "$op_cv_sve_support" = "no"],[
AC_MSG_CHECKING([for SVE support (with +sve)])
AC_LINK_IFELSE(
[AC_LANG_SOURCE([[
#if defined(__aarch64__)
#include <arm_sve.h>
#else
#error "not on aarch64"
#endif
]])],
[op_cv_sve_support=yes],
[op_cv_sve_support=no])])])
])

__attribute__((__target__("+sve")))
int main(void) {
svbool_t pg = svptrue_b32();
svuint32_t a = svdup_u32(0);
svuint32_t b = svdup_u32(0);
svuint32_t c = svadd_u32_m(pg, a, b);
return (int)svaddv_u32(pg, c);
}
]])],
[ op_cv_sve_support=yes
op_cv_sve_add_flags=yes
AC_MSG_RESULT([yes]) ],
[ AC_MSG_RESULT([no ]) ]
)
])
])

AC_LANG_POP
])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_support],
[test "$op_cv_neon_support" = "yes"])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_fp_support],
[test "$op_cv_neon_fp_support" = "yes"])
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sve_support],
[test "$op_cv_sve_support" = "yes"])

AC_SUBST(MCA_BUILD_ompi_op_has_neon_support)
AC_SUBST(MCA_BUILD_ompi_op_has_neon_fp_support)
AC_SUBST(MCA_BUILD_ompi_op_has_sve_support)
Expand All @@ -111,9 +150,13 @@ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
[AC_DEFINE([OMPI_MCA_OP_HAVE_NEON_FP], [1],[NEON FP supported in the current build])])
AS_IF([test "$op_cv_sve_support" = "yes"],
[AC_DEFINE([OMPI_MCA_OP_HAVE_SVE], [1],[SVE supported in the current build])])
AS_IF([test "$op_cv_sve_add_flags" = "yes"],
[AC_DEFINE([OMPI_MCA_OP_SVE_EXTRA_FLAGS], [1],[SVE supported with additional compile attributes])])

# If we have at least support for Neon
AS_IF([test "$op_cv_neon_support" = "yes"],

# If we have at least support for Neon or SVE
AS_IF([test "$op_cv_neon_support" = "yes" || test "$op_cv_sve_support" = "yes" ],
[$1],
[$2])

])dnl
6 changes: 6 additions & 0 deletions ompi/mca/op/aarch64/op_aarch64.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@

BEGIN_C_DECLS

#if defined(OMPI_MCA_OP_SVE_EXTRA_FLAGS)
#define SVE_ATTR __attribute__ ((__target__ ("+sve")))
#else
#define SVE_ATTR
#endif

/**
* Derive a struct from the base op component struct, allowing us to
* cache some component-specific information on our well-known
Expand Down
2 changes: 1 addition & 1 deletion ompi/mca/op/aarch64/op_aarch64_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static int mca_op_aarch64_component_close(void)
/*
* Register MCA params.
*/
static int mca_op_aarch64_component_register(void)
SVE_ATTR static int mca_op_aarch64_component_register(void)
{

mca_op_aarch64_component.hardware_available = 1; /* Check for Neon */
Expand Down
74 changes: 38 additions & 36 deletions ompi/mca/op/aarch64/op_aarch64_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,24 +136,25 @@ _Generic((*(out)), \
} \
}
#elif defined(GENERATE_SVE_CODE)
#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \
#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \
SVE_ATTR \
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \
(const void *_in, void *_out, int *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
{ \
const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
const int cnt = *count; \
type##type_size##_t *in = (type##type_size##_t *) _in, \
*out = (type##type_size##_t *) _out; \
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
for (int idx=0; idx < cnt; idx += types_per_step) { \
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
vsrc = svld1(pred, &in[idx]); \
vdst = svld1(pred, &out[idx]); \
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
} \
(const void *_in, void *_out, int *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
{ \
const int types_per_step = svcnt(*((type##type_size##_t *) _in)); \
const int cnt = *count; \
type##type_size##_t *in = (type##type_size##_t *) _in, \
*out = (type##type_size##_t *) _out; \
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
for (int idx=0; idx < cnt; idx += types_per_step) { \
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
vsrc = svld1(pred, &in[idx]); \
vdst = svld1(pred, &out[idx]); \
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
} \
}
#endif

Expand Down Expand Up @@ -302,25 +303,26 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
} \
}
#elif defined(GENERATE_SVE_CODE)
#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
(const void *_in1, const void *_in2, void *_out, int *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
{ \
const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
*in2 = (type##type_size##_t *) _in2, \
*out = (type##type_size##_t *) _out; \
const int cnt = *count; \
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
for (int idx=0; idx < cnt; idx += types_per_step) { \
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
vsrc = svld1(pred, &in1[idx]); \
vdst = svld1(pred, &in2[idx]); \
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
} \
#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \
SVE_ATTR \
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
(const void *_in1, const void *_in2, void *_out, int *count, \
struct ompi_datatype_t **dtype, \
struct ompi_op_base_module_1_0_0_t *module) \
{ \
const int types_per_step = svcnt(*((type##type_size##_t *) _in1)); \
type##type_size##_t *in1 = (type##type_size##_t *) _in1, \
*in2 = (type##type_size##_t *) _in2, \
*out = (type##type_size##_t *) _out; \
const int cnt = *count; \
OP_CONCAT(OMPI_OP_TYPE_PREPEND, type##type_size##_t) vsrc, vdst; \
for (int idx=0; idx < cnt; idx += types_per_step) { \
svbool_t pred = svwhilelt_b##type_size(idx, cnt); \
vsrc = svld1(pred, &in1[idx]); \
vdst = svld1(pred, &in2[idx]); \
vdst = OP_CONCAT(OMPI_OP_OP_PREPEND, op##_x)(pred, vdst, vsrc); \
OP_CONCAT(OMPI_OP_OP_PREPEND, st1)(pred, &out[idx], vdst); \
} \
}
#endif /* defined(GENERATE_SVE_CODE) */

Expand Down
Loading