@@ -602,6 +602,17 @@ export class DesktopLlamaAdapter {
602602 private modelPtr : Pointer | null = null ;
603603 /** Pool of `llama_context` instances. Index 0 is allocated by `loadModel()`. */
604604 private ctxPool : Pointer [ ] = [ ] ;
605+ /**
606+ * Serializes concurrent resizeParallel() callers. The C-side
607+ * llama_init_from_model is itself thread-safe (Metal registry uses
608+ * static std::mutex; CUDA/Vulkan ctx ctors are independent) and
609+ * bun:ffi calls block the JS thread, so within one call the for-loop
610+ * inside resizeParallel is already safe. This lock exists so future
611+ * `await`s added inside resizeParallel cannot let two callers
612+ * interleave pool mutations (push/pop on ctxPool, hasDecodedFlags,
613+ * drafterAttached).
614+ */
615+ private growLock : Promise < unknown > = Promise . resolve ( ) ;
605616 /** Per-ctx KV-decoded flag (drives the `memory_clear` guard between sessions). */
606617 private hasDecodedFlags : boolean [ ] = [ ] ;
607618 /** Per-ctx attached drafter — `null` when no drafter on that ctx. */
@@ -880,66 +891,76 @@ export class DesktopLlamaAdapter {
880891 * Returns true when the pool size actually changed, false on no-op.
881892 */
882893 async resizeParallel ( target : number ) : Promise < boolean > {
883- if ( ! this . modelPtr || ! this . loadOpts ) {
884- throw new Error ( "[desktop-llama] resizeParallel before model load" ) ;
885- }
886- if ( target < 1 ) {
887- throw new Error (
888- `[desktop-llama] resizeParallel target must be >= 1, got ${ target } ` ,
889- ) ;
890- }
891- const current = this . ctxPool . length ;
892- if ( target === current ) return false ;
893- if ( target < current ) {
894- // Refuse to shrink while sessions are still pinned to outgoing slots.
895- for ( const sess of this . sessions . values ( ) ) {
896- if ( sess . ctxIdx >= target ) {
894+ const prev = this . growLock ;
895+ let release ! : ( ) => void ;
896+ this . growLock = new Promise < void > ( ( r ) => {
897+ release = r ;
898+ } ) ;
899+ try {
900+ await prev ;
901+ if ( ! this . modelPtr || ! this . loadOpts ) {
902+ throw new Error ( "[desktop-llama] resizeParallel before model load" ) ;
903+ }
904+ if ( target < 1 ) {
905+ throw new Error (
906+ `[desktop-llama] resizeParallel target must be >= 1, got ${ target } ` ,
907+ ) ;
908+ }
909+ const current = this . ctxPool . length ;
910+ if ( target === current ) return false ;
911+ if ( target < current ) {
912+ // Refuse to shrink while sessions are still pinned to outgoing slots.
913+ for ( const sess of this . sessions . values ( ) ) {
914+ if ( sess . ctxIdx >= target ) {
915+ throw new Error (
916+ `[desktop-llama] cannot shrink pool to ${ target } : session pinned to ctxIdx=${ sess . ctxIdx } ` ,
917+ ) ;
918+ }
919+ }
920+ for ( let i = current - 1 ; i >= target ; i -- ) {
921+ const ctx = this . ctxPool [ i ] ;
922+ if ( ctx !== undefined ) this . llama . llama_free ( ctx ) ;
923+ this . ctxPool . pop ( ) ;
924+ this . hasDecodedFlags . pop ( ) ;
925+ this . drafterAttached . pop ( ) ;
926+ }
927+ return true ;
928+ }
929+ // Grow: allocate (target - current) additional ctxs.
930+ for ( let i = current ; i < target ; i ++ ) {
931+ const cp = this . shim . eliza_llama_context_params_default ( ) ;
932+ let nextCtx : Pointer ;
933+ try {
934+ const ctxSize = this . loadOpts . contextSize ?? 4096 ;
935+ const nBatch = this . loadOpts . nBatch ?? 256 ;
936+ const threads = this . loadOpts . threads ?? defaultThreads ( ) ;
937+ this . shim . eliza_llama_context_params_set_n_ctx ( cp , ctxSize ) ;
938+ this . shim . eliza_llama_context_params_set_n_batch ( cp , nBatch ) ;
939+ this . shim . eliza_llama_context_params_set_n_ubatch (
940+ cp ,
941+ this . loadOpts . nUBatch ?? nBatch ,
942+ ) ;
943+ this . shim . eliza_llama_context_params_set_n_threads ( cp , threads ) ;
944+ this . shim . eliza_llama_context_params_set_n_threads_batch ( cp , threads ) ;
945+ this . shim . eliza_llama_context_params_set_embeddings ( cp , false ) ;
946+ this . shim . eliza_llama_context_params_set_offload_kqv ( cp , true ) ;
947+ nextCtx = this . shim . eliza_llama_init_from_model ( this . modelPtr , cp ) ;
948+ } finally {
949+ this . shim . eliza_llama_context_params_free ( cp ) ;
950+ }
951+ if ( ! nextCtx ) {
897952 throw new Error (
898- `[desktop-llama] cannot shrink pool to ${ target } : session pinned to ctxIdx= ${ sess . ctxIdx } ` ,
953+ `[desktop-llama] llama_init_from_model failed when growing pool to ${ target } ` ,
899954 ) ;
900955 }
901- }
902- for ( let i = current - 1 ; i >= target ; i -- ) {
903- const ctx = this . ctxPool [ i ] ;
904- if ( ctx !== undefined ) this . llama . llama_free ( ctx ) ;
905- this . ctxPool . pop ( ) ;
906- this . hasDecodedFlags . pop ( ) ;
907- this . drafterAttached . pop ( ) ;
956+ this . ctxPool . push ( nextCtx ) ;
957+ this . hasDecodedFlags . push ( false ) ;
958+ this . drafterAttached . push ( false ) ;
908959 }
909960 return true ;
961+ } finally {
962+ release ( ) ;
910963 }
911- // Grow: allocate (target - current) additional ctxs.
912- for ( let i = current ; i < target ; i ++ ) {
913- const cp = this . shim . eliza_llama_context_params_default ( ) ;
914- let nextCtx : Pointer ;
915- try {
916- const ctxSize = this . loadOpts . contextSize ?? 4096 ;
917- const nBatch = this . loadOpts . nBatch ?? 256 ;
918- const threads = this . loadOpts . threads ?? defaultThreads ( ) ;
919- this . shim . eliza_llama_context_params_set_n_ctx ( cp , ctxSize ) ;
920- this . shim . eliza_llama_context_params_set_n_batch ( cp , nBatch ) ;
921- this . shim . eliza_llama_context_params_set_n_ubatch (
922- cp ,
923- this . loadOpts . nUBatch ?? nBatch ,
924- ) ;
925- this . shim . eliza_llama_context_params_set_n_threads ( cp , threads ) ;
926- this . shim . eliza_llama_context_params_set_n_threads_batch ( cp , threads ) ;
927- this . shim . eliza_llama_context_params_set_embeddings ( cp , false ) ;
928- this . shim . eliza_llama_context_params_set_offload_kqv ( cp , true ) ;
929- nextCtx = this . shim . eliza_llama_init_from_model ( this . modelPtr , cp ) ;
930- } finally {
931- this . shim . eliza_llama_context_params_free ( cp ) ;
932- }
933- if ( ! nextCtx ) {
934- throw new Error (
935- `[desktop-llama] llama_init_from_model failed when growing pool to ${ target } ` ,
936- ) ;
937- }
938- this . ctxPool . push ( nextCtx ) ;
939- this . hasDecodedFlags . push ( false ) ;
940- this . drafterAttached . push ( false ) ;
941- }
942- return true ;
943964 }
944965
945966 /**
0 commit comments