Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
28e2c4f
#2435: lb: change neighborhood, improve stopping criteria
lifflander May 27, 2025
17b1f6c
#2435: lb: reduce TD epochs
lifflander May 27, 2025
4137aac
#2435: lb: make sure f can never be zero
lifflander Jun 3, 2025
84b19d0
#2435: temperedlb: major changes and fixes to communication update wi…
lifflander Jun 25, 2025
e44441c
#2435: temperedlb: add non-cluster objects and handle that case
lifflander Jun 26, 2025
80a2259
#2435: temperedlb: clean up prints
lifflander Jun 26, 2025
ebbd67b
#2435: temperedlb: update Log-based knowledge to new formulas
lifflander Jun 26, 2025
5ab8d5a
#2435: temperedlb: fix bugs with edges, add converge tolerance
lifflander Jul 2, 2025
6f252ce
#2435: temperedlb: add give edges
lifflander Jul 2, 2025
f769e87
#2435: temperedlb: add input key
lifflander Jul 2, 2025
90e4a7b
#2435: temperedlb: fix curly brace
lifflander Jul 2, 2025
f3ffd0e
#2435: temperedlb: fix curly brace again..
lifflander Jul 2, 2025
673c9da
#2435: temperedlb: fix whitespace
lifflander Jul 3, 2025
0f56fbb
#2435: tests: remove improper semicolons
lifflander Jul 28, 2025
ba00c75
#2435: lb: reset converge tolernace each trial
lifflander Jul 29, 2025
a6e5f4b
#2435: tests: decrease converge tolerance for test
lifflander Jul 29, 2025
c6619fc
#2435: lb: swap f_ and k_max_
lifflander Jul 29, 2025
08633e6
#2435: lb: abstract cluster summary into a helper function
lifflander Jul 29, 2025
989ecf3
#2435: lb: a sentinel for no shared ID
lifflander Jul 29, 2025
b0fa623
#2435: lb: pass test helper maps by const&
lifflander Jul 29, 2025
cb25480
#2435: lb: add helper and sentinel to header file
lifflander Jul 29, 2025
b93da25
#2435: lb: wrap debug prints in temperedlb
lifflander Aug 29, 2025
a983931
#2435: lb: factor out checkConverged function
lifflander Aug 29, 2025
0576c19
#2435: lb: use direct type instead of iterator
lifflander Aug 29, 2025
11debd9
#2435: lb: factor out ClusterInfo inter cluster and obj edge addition
lifflander Aug 29, 2025
2976892
#2435: lb: add another helper
lifflander Aug 29, 2025
f4e1cc1
#2435: temperedlb: switch allreduce to reduce in several cases
lifflander Oct 1, 2025
183c309
#2435: temperedlb: switch two missed allreduces
lifflander Oct 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ using BytesType = double;
struct ClusterInfo {
LoadType load = 0;
BytesType bytes = 0;
SharedIDType shared_id = -1;
double intra_send_vol = 0, intra_recv_vol = 0;
std::unordered_map<NodeType, double> inter_send_vol, inter_recv_vol;
std::unordered_map<SharedIDType, double> inter_cluster_send_vol,
inter_cluster_recv_vol;
std::unordered_map<elm::ElementIDStruct, double> obj_send_vol, obj_recv_vol;
NodeType home_node = uninitialized_destination;
BytesType edge_weight = 0;
BytesType max_object_working_bytes = 0;
Expand All @@ -67,10 +70,35 @@ struct ClusterInfo {
BytesType max_object_serialized_bytes_outside = 0;
BytesType cluster_footprint = 0;

void addInterClusterEdge(bool is_send, SharedIDType id, double volume) {
if (is_send) {
inter_cluster_send_vol[id] += volume;
} else {
inter_cluster_recv_vol[id] += volume;
}
}

void addIntraVolume(bool is_send, double volume) {
if (is_send) {
intra_send_vol += volume;
} else {
intra_recv_vol += volume;
}
}

void addObjEdge(bool is_send, elm::ElementIDStruct obj, double volume) {
if (is_send) {
obj_send_vol[obj] += volume;
} else {
obj_recv_vol[obj] += volume;
}
}

template <typename SerializerT>
void serialize(SerializerT& s) {
s | load | bytes | intra_send_vol | intra_recv_vol;
s | inter_send_vol | inter_recv_vol;
s | load | shared_id | bytes | intra_send_vol | intra_recv_vol;
s | inter_cluster_send_vol | inter_cluster_recv_vol;
s | obj_send_vol | obj_recv_vol;
s | home_node | edge_weight;
s | max_object_working_bytes;
s | max_object_working_bytes_outside;
Expand All @@ -86,13 +114,17 @@ struct NodeInfo {
double inter_send_vol = 0, inter_recv_vol = 0;
double intra_send_vol = 0, intra_recv_vol = 0;
double shared_vol = 0;
std::set<SharedIDType> shared_ids;
std::set<elm::ElementIDStruct> non_cluster_objs;

template <typename SerializerT>
void serialize(SerializerT& s) {
s | load | work;
s | inter_send_vol | inter_recv_vol;
s | intra_send_vol | intra_recv_vol;
s | shared_vol;
s | shared_ids;
s | non_cluster_objs;
}
};

Expand Down
Loading
Loading