@@ -1639,7 +1639,8 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
16391639
16401640 // check if we should start a new split based on the sources of the current node
16411641 bool need_new_split = false ;
1642- if (node->op == GGML_OP_ADD && node->op_params [0 ] == 0xff ) {
1642+ if ((node->op == GGML_OP_ADD && node->op_params [0 ] == 0xff ) ||
1643+ node->op_params [GGML_MAX_OP_PARAMS / sizeof (int32_t ) - 1 ] == 0xff ) {
16431644 need_new_split = true ;
16441645 }
16451646 else if (node_backend_id == cur_backend_id && split->n_inputs > 0 ) {
@@ -1882,6 +1883,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
18821883static void ggml_backend_sched_copy_inputs (ggml_backend_sched_t sched, ggml_backend_sched_split * split, std::array<bool , GGML_SCHED_MAX_BACKENDS> & needs_sync,
18831884 std::vector<int32_t > & ids, std::vector<uint32_t > & unique_ids, ggml_tensor * last_ids_tensor) {
18841885 if (split->n_inputs < 1 ) return ;
1886+ constexpr bool k_set_sync = false ;
18851887 int split_backend_id = split->backend_id ;
18861888 ggml_backend_t split_backend = sched->backends [split_backend_id];
18871889 ggml_backend_t last_input_backend = nullptr ;
@@ -1892,13 +1894,10 @@ static void ggml_backend_sched_copy_inputs(ggml_backend_sched_t sched, ggml_back
18921894
18931895 if (input->flags & GGML_TENSOR_FLAG_INPUT) {
18941896 // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
1895- if (needs_sync[split_backend_id]) {
1896- if (sched->events [split_backend_id][sched->cur_copy ] != NULL ) {
1897- ggml_backend_event_synchronize (sched->events [split_backend_id][sched->cur_copy ]);
1898- } else {
1899- ggml_backend_synchronize (split_backend);
1900- }
1901- needs_sync[split_backend_id] = false ;
1897+ if (sched->events [split_backend_id][sched->cur_copy ] != NULL ) {
1898+ ggml_backend_event_synchronize (sched->events [split_backend_id][sched->cur_copy ]);
1899+ } else {
1900+ ggml_backend_synchronize (split_backend);
19021901 }
19031902 ggml_backend_tensor_copy (input, input_cpy);
19041903 } else {
@@ -1909,7 +1908,7 @@ static void ggml_backend_sched_copy_inputs(ggml_backend_sched_t sched, ggml_back
19091908 } else {
19101909 ggml_backend_synchronize (split_backend);
19111910 }
1912- needs_sync[split_backend_id] = false ;
1911+ needs_sync[split_backend_id] = k_set_sync ;
19131912 }
19141913
19151914 ggml_tensor * node = split->graph .nodes [0 ];
@@ -1923,7 +1922,6 @@ static void ggml_backend_sched_copy_inputs(ggml_backend_sched_t sched, ggml_back
19231922 last_input_backend = input_backend;
19241923 }
19251924
1926- // printf("node: %s have %d inputs, processing input %d\n", node->name, split->n_inputs, j);
19271925 ggml_tensor * ids_tensor = node->op == GGML_OP_MUL_MAT_ID ? node->src [2 ] : node->src [3 ];
19281926 auto ids_backend = split_backend;
19291927
@@ -1945,7 +1943,7 @@ static void ggml_backend_sched_copy_inputs(ggml_backend_sched_t sched, ggml_back
19451943 ggml_backend_tensor_get_async (ids_backend, ids_tensor, ids.data (), 0 , ggml_nbytes (ids_tensor));
19461944
19471945 ggml_backend_synchronize (ids_backend);
1948- needs_sync[tensor_backend_id (ids_tensor)] = false ;
1946+ needs_sync[tensor_backend_id (ids_tensor)] = k_set_sync ;
19491947
19501948 unique_ids.resize ((n_expert + 31 )/32 );
19511949 std::memset (unique_ids.data (), 0 , unique_ids.size ()*sizeof (uint32_t ));
@@ -2005,15 +2003,15 @@ static void ggml_backend_sched_copy_inputs(ggml_backend_sched_t sched, ggml_back
20052003 int input_backend_id = tensor_backend_id (input);
20062004 if (needs_sync[input_backend_id]) {
20072005 ggml_backend_synchronize (input_backend);
2008- needs_sync[input_backend_id] = false ;
2006+ needs_sync[input_backend_id] = k_set_sync ;
20092007 }
20102008 if (needs_sync[split_backend_id]) {
20112009 if (sched->events [split_backend_id][sched->cur_copy ] != NULL ) {
20122010 ggml_backend_event_synchronize (sched->events [split_backend_id][sched->cur_copy ]);
20132011 } else {
20142012 ggml_backend_synchronize (split_backend);
20152013 }
2016- needs_sync[split_backend_id] = false ;
2014+ needs_sync[split_backend_id] = k_set_sync ;
20172015 }
20182016 ggml_backend_tensor_copy (input, input_cpy);
20192017 }
@@ -2034,7 +2032,6 @@ static ggml_status ggml_backend_sched_compute_splits_sm_graph(ggml_backend_sched
20342032 for (int i = 0 ; i < sched->n_splits ; ++i) {
20352033 auto split_i = &splits[i];
20362034 this_split.clear ();
2037- // auto& this_split = all_splits.emplace_back();
20382035 this_split.push_back (split_i);
20392036 for (int j = i+1 ; j < sched->n_splits ; ++j) {
20402037 auto split_j = &splits[j];
@@ -2092,7 +2089,7 @@ static ggml_status ggml_backend_sched_compute_splits_sm_graph(ggml_backend_sched
20922089
20932090static enum ggml_status ggml_backend_sched_compute_splits (ggml_backend_sched_t sched) {
20942091
2095- if (sched->split_mode_graph ) {
2092+ if (false && sched->split_mode_graph ) {
20962093 return ggml_backend_sched_compute_splits_sm_graph (sched);
20972094 }
20982095
0 commit comments