@@ -1414,13 +1414,59 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
14141414 // do not overwrite user assignments
14151415 if (*leaf_backend_id == -1 ) {
14161416 *leaf_backend_id = ggml_backend_sched_backend_id_from_cur (sched, leaf);
1417- // printf("Pass 1: assigned backend %d to leaf %d, %s\n", *leaf_backend_id, i, graph->leafs[i]->name);
14181417 }
14191418 }
14201419
14211420 for (int i = 0 ; i < graph->n_nodes ; i++) {
14221421 struct ggml_tensor * node = graph->nodes [i];
14231422 int * node_backend_id = &tensor_backend_id (node);
1423+ if (node->op == GGML_OP_REDUCE) {
1424+ auto view_src = node->view_src ;
1425+ int src_id = -1 ;
1426+ for (int j = 0 ; j < node->op_params [1 ]; ++j) {
1427+ if (node->src [j]) {
1428+ int * this_node_backend_id = &tensor_backend_id (node->src [j]);
1429+ if (*this_node_backend_id == -1 ) {
1430+ *this_node_backend_id = j;
1431+ } else {
1432+ GGML_ASSERT (*this_node_backend_id == j);
1433+ }
1434+ if (view_src == node->src [j]) {
1435+ src_id = j;
1436+ }
1437+ }
1438+ }
1439+ if (src_id >= 0 ) {
1440+ int * this_node_backend_id = &tensor_backend_id (view_src);
1441+ *this_node_backend_id = tensor_backend_id (node->src [src_id]);
1442+ *node_backend_id = *this_node_backend_id;
1443+ }
1444+ }
1445+ else if (node->op == GGML_OP_MUL && node->src [0 ]->op == GGML_OP_NORM) {
1446+ // This is a hack for Cohere2. Without this hack the scheduler creates
1447+ // totally nonsensical splits for that arch
1448+ int * src1_id = &tensor_backend_id (node->src [1 ]);
1449+ if (*src1_id >= 0 ) {
1450+ int * src0_id = &tensor_backend_id (node->src [0 ]);
1451+ int * dst_id = &tensor_backend_id (node);
1452+ *src0_id = *src1_id;
1453+ *dst_id = *src1_id;
1454+ // For some reason that I don't understand, we can have norm backend already assigned
1455+ // at this point. How? That's why this more logical approach of first checking is commented out
1456+ // if (*src0_id < 0) {
1457+ // *src0_id = *src1_id;
1458+ // } else {
1459+ // printf("Oops: backend_id_src0(%s) = %d, backend_id_src1(%s) = %d\n", node->src[0]->name, *src0_id, node->src[1]->name, *src1_id);
1460+ // //GGML_ASSERT(*src0_id == *src1_id);
1461+ // }
1462+ // if (*dst_id < 0) {
1463+ // *dst_id = *src1_id;
1464+ // } else {
1465+ // printf("Oops: backend_id_dst(%s) = %d, backend_id_src1(%s) = %d\n", node->name, *dst_id, node->src[1]->name, *src1_id);
1466+ // //GGML_ASSERT(*dst_id == *src1_id);
1467+ // }
1468+ }
1469+ }
14241470 // do not overwrite user assignments
14251471 if (*node_backend_id == -1 ) {
14261472 *node_backend_id = ggml_backend_sched_backend_id_from_cur (sched, node);
@@ -1652,6 +1698,8 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
16521698 // check if we should start a new split based on the sources of the current node
16531699 bool need_new_split = false ;
16541700 if ((node->op == GGML_OP_ADD && node->op_params [0 ] == 0xff ) ||
1701+ node->op == GGML_OP_REDUCE ||
1702+ node->op == GGML_OP_FAKE_CPY ||
16551703 node->op_params [GGML_MAX_OP_PARAMS / sizeof (int32_t ) - 1 ] == 0xff ) {
16561704 need_new_split = true ;
16571705 }
@@ -1739,6 +1787,13 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
17391787 if (src_backend_id != cur_backend_id && !ggml_backend_sched_buffer_supported (sched, src, cur_backend_id)) {
17401788 // create a copy of the input in the split's backend
17411789 if (tensor_id_copy (src_id, cur_backend_id, 0 ) == NULL ) {
1790+ if (node->op == GGML_OP_REDUCE) {
1791+ // printf("setting tensor_id_copy(reduce, %zu, %d, %s) to %s\n", src_id, cur_backend_id, node->name, src->name);
1792+ tensor_id_copy (src_id, cur_backend_id, 0 ) = src;
1793+ } else if (node->op == GGML_OP_FAKE_CPY && src->op == GGML_OP_REDUCE) {
1794+ // printf("setting tensor_id_copy(fake_cpy, %zu, %d, %s) to %s\n", src_id, cur_backend_id, node->name, src->src[j]->name);
1795+ tensor_id_copy (src_id, cur_backend_id, 0 ) = src->src [j];
1796+ } else {
17421797 ggml_backend_t backend = sched->backends [cur_backend_id];
17431798 for (int c = 0 ; c < sched->n_copies ; c++) {
17441799 struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout (sched->ctx , src);
@@ -1753,6 +1808,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
17531808 int n_inputs = split->n_inputs ++;
17541809 GGML_ASSERT (n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
17551810 split->inputs [n_inputs] = src;
1811+ }
17561812 }
17571813 node->src [j] = tensor_id_copy (src_id, cur_backend_id, sched->cur_copy );
17581814 }
@@ -2027,80 +2083,8 @@ static void ggml_backend_sched_copy_inputs(ggml_backend_sched_t sched, ggml_back
20272083 }
20282084}
20292085
2030- static ggml_status ggml_backend_sched_compute_splits_sm_graph (ggml_backend_sched_t sched) {
2031- std::vector<int32_t > ids;
2032- std::vector<uint32_t > unique_ids;
2033- ggml_tensor * last_ids_tensor = nullptr ;
2034-
2035- std::array<bool , GGML_SCHED_MAX_BACKENDS> needs_sync{{true }};
2036-
2037- auto splits = sched->splits ;
2038-
2039- std::vector<ggml_backend_sched_split *> this_split;
2040- for (int i = 0 ; i < sched->n_splits ; ++i) {
2041- auto split_i = &splits[i];
2042- this_split.clear ();
2043- this_split.push_back (split_i);
2044- for (int j = i+1 ; j < sched->n_splits ; ++j) {
2045- auto split_j = &splits[j];
2046- if (split_i->backend_id == split_j->backend_id ) {
2047- break ;
2048- }
2049- int n_nodes = std::min (split_i->graph .n_nodes , split_j->graph .n_nodes );
2050- bool same = true ;
2051- for (int k = 0 ; k < n_nodes; ++k) {
2052- if (split_i->graph .nodes [k]->op != split_j->graph .nodes [k]->op ) {
2053- same = false ; break ;
2054- }
2055- }
2056- if (!same) {
2057- break ;
2058- }
2059- this_split.push_back (split_j);
2060- }
2061- if (false ) {
2062- auto split = this_split.front ();
2063- if (this_split.size () == 1 ) {
2064- printf (" === Split %d with %d inputs on backend %d\n " , i, split->n_inputs , split->backend_id );
2065- } else {
2066- printf (" === Split %d with %d inputs on backends" , i, split->n_inputs );
2067- for (int j = 0 ; j < (int )this_split.size (); ++j) printf (" %d" , this_split[j]->backend_id );
2068- printf (" \n " );
2069- }
2070- for (int j = 0 ; j < split->graph .n_nodes ; ++j) {
2071- printf (" %d %s(%s)\n " , j, ggml_op_name (split->graph .nodes [j]->op ), split->graph .nodes [j]->name );
2072- }
2073- }
2074- for (auto split : this_split) {
2075- ggml_backend_sched_copy_inputs (sched, split, needs_sync, ids, unique_ids, last_ids_tensor);
2076- }
2077- for (auto split : this_split) {
2078- auto split_backend_id = split->backend_id ;
2079- if (split->n_inputs > 0 ) {
2080- needs_sync[split_backend_id] = true ;
2081- }
2082- auto split_backend = sched->backends [split_backend_id];
2083- auto ec = ggml_backend_graph_compute_async (split_backend, &split->graph );
2084- if (ec != GGML_STATUS_SUCCESS) {
2085- return ec;
2086- }
2087- if (split->n_inputs > 0 ) {
2088- if (sched->events [split_backend_id][sched->cur_copy ] != NULL ) {
2089- ggml_backend_event_record (sched->events [split_backend_id][sched->cur_copy ]);
2090- }
2091- }
2092- }
2093- i += this_split.size () - 1 ;
2094- }
2095- return GGML_STATUS_SUCCESS;
2096- }
2097-
20982086static enum ggml_status ggml_backend_sched_compute_splits (ggml_backend_sched_t sched) {
20992087
2100- if (false && sched->split_mode_graph ) {
2101- return ggml_backend_sched_compute_splits_sm_graph (sched);
2102- }
2103-
21042088 std::array<bool , GGML_SCHED_MAX_BACKENDS> needs_sync{{true }};
21052089 std::array<bool , GGML_SCHED_MAX_BACKENDS> own_cpy{{false }};
21062090
0 commit comments