File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -341,7 +341,7 @@ llama_context::llama_context(
341341 // enabling pipeline parallelism in the scheduler increases memory usage, so it is only done when necessary
342342 bool pipeline_parallel =
343343 model.n_devices () > 1 &&
344- model.n_gpu_layers () > model.hparams .n_layer () &&
344+ model.n_gpu_layers () > model.hparams .n_layer_all &&
345345 model.split_mode () == LLAMA_SPLIT_MODE_LAYER &&
346346 cparams.offload_kqv &&
347347 !model.has_tensor_overrides ();
@@ -2351,7 +2351,7 @@ llm_graph_cb llama_context::graph_get_cb() const {
23512351
23522352 // norm may be automatically assigned to the backend of the previous layer, increasing data transfer between backends
23532353 // FIXME: fix in ggml_backend_sched
2354- const bool full_offload = model.n_gpu_layers () > model.hparams .n_layer () ;
2354+ const bool full_offload = model.n_gpu_layers () > model.hparams .n_layer_all ;
23552355 if (ubatch.n_tokens < 32 || full_offload) {
23562356 if (il != -1 && strcmp (name, " norm" ) == 0 ) {
23572357 const auto & dev_layer = model.dev_layer (il);
You can’t perform that action at this time.
0 commit comments