|
| 1 | +// Copyright © 2024 Apple Inc. |
| 2 | + |
| 3 | +#pragma once |
| 4 | + |
| 5 | +#include <memory> |
| 6 | + |
| 7 | +#include "mlx/array.h" |
| 8 | + |
| 9 | +namespace mlx::core::distributed { |
| 10 | + |
| 11 | +/* Check if a communication backend is available */ |
| 12 | +bool is_available(); |
| 13 | + |
| 14 | +/** |
| 15 | + * A distributed::Group represents a group of independent mlx processes that |
| 16 | + * can communicate. We must also be able to create sub-groups from a group in |
| 17 | + * order to define more granular communication. |
| 18 | + */ |
| 19 | +struct Group { |
| 20 | + Group(std::shared_ptr<void> group) : group_(group) {} |
| 21 | + |
| 22 | + int rank(); |
| 23 | + int size(); |
| 24 | + |
| 25 | + /** |
| 26 | + * Split the group according to the provided color. Namely processes that use |
| 27 | + * the same color will go to the same group. |
| 28 | + * |
| 29 | + * The key defines the rank of the processes in the new group. The smaller |
| 30 | + * the key the smaller the rank. If the provided key is negative, then the |
| 31 | + * rank in the current group is used. |
| 32 | + */ |
| 33 | + Group split(int color, int key = -1); |
| 34 | + |
| 35 | + const std::shared_ptr<void>& raw_group() { |
| 36 | + return group_; |
| 37 | + } |
| 38 | + |
| 39 | + private: |
| 40 | + std::shared_ptr<void> group_{nullptr}; |
| 41 | +}; |
| 42 | + |
| 43 | +/** |
| 44 | + * Initialize the distributed backend and return the group containing all |
| 45 | + * discoverable processes. |
| 46 | + */ |
| 47 | +Group init(); |
| 48 | + |
| 49 | +namespace detail { |
| 50 | + |
| 51 | +/* Return the communication stream. */ |
| 52 | +Stream communication_stream(); |
| 53 | + |
| 54 | +/* Perform an all reduce sum operation */ |
| 55 | +void all_reduce_sum(Group group, const array& input, array& output); |
| 56 | + |
| 57 | +/* Perform an all reduce sum operation */ |
| 58 | +void all_gather(Group group, const array& input, array& output); |
| 59 | + |
| 60 | +} // namespace detail |
| 61 | + |
| 62 | +} // namespace mlx::core::distributed |
0 commit comments