@@ -91,6 +91,62 @@ describe('Deployment Routes', () => {
9191 expect ( spec . model . storage . volumes [ 0 ] . name ) . toBe ( 'model-cache' ) ;
9292 expect ( spec . model . storage . volumes [ 0 ] . size ) . toBe ( '100Gi' ) ;
9393 } ) ;
94+
95+ test ( 'normalizes KAITO GGUF deployments to llamacpp in preview manifests' , async ( ) => {
96+ restores . push (
97+ mockServiceMethod ( configService , 'getDefaultNamespace' , async ( ) => 'kaito-workspace' ) ,
98+ ) ;
99+
100+ const res = await app . request ( '/api/deployments/preview' , {
101+ method : 'POST' ,
102+ headers : { 'Content-Type' : 'application/json' } ,
103+ body : JSON . stringify ( {
104+ ...validDeploymentBody ,
105+ namespace : 'kaito-workspace' ,
106+ provider : 'kaito' ,
107+ modelId : 'nvidia/Nemotron-3-Nano-4B-gguf' ,
108+ modelSource : 'huggingface' ,
109+ ggufFile : 'nvidia-nemotron-3-nano-4b.Q4_K_M.gguf' ,
110+ ggufRunMode : 'direct' ,
111+ } ) ,
112+ } ) ;
113+
114+ expect ( res . status ) . toBe ( 200 ) ;
115+
116+ const data = await res . json ( ) ;
117+ expect ( data . resources [ 0 ] . manifest . spec . engine . type ) . toBe ( 'llamacpp' ) ;
118+ expect ( data . resources [ 0 ] . manifest . spec . engine . args . ggufUrl ) . toBe (
119+ 'https://huggingface.co/nvidia/Nemotron-3-Nano-4B-gguf/resolve/main/nvidia-nemotron-3-nano-4b.Q4_K_M.gguf'
120+ ) ;
121+ expect ( data . resources [ 0 ] . manifest . spec . image ) . toBe ( 'ghcr.io/kaito-project/aikit/runners/llama-cpp-cuda:latest' ) ;
122+ expect ( data . resources [ 0 ] . manifest . spec . provider . name ) . toBe ( 'kaito' ) ;
123+ } ) ;
124+
125+ test ( 'normalizes KAITO premade deployments to llamacpp in preview manifests' , async ( ) => {
126+ restores . push (
127+ mockServiceMethod ( configService , 'getDefaultNamespace' , async ( ) => 'kaito-workspace' ) ,
128+ ) ;
129+
130+ const res = await app . request ( '/api/deployments/preview' , {
131+ method : 'POST' ,
132+ headers : { 'Content-Type' : 'application/json' } ,
133+ body : JSON . stringify ( {
134+ ...validDeploymentBody ,
135+ namespace : 'kaito-workspace' ,
136+ provider : 'kaito' ,
137+ modelId : 'llama3.2:3b' ,
138+ modelSource : 'premade' ,
139+ premadeModel : 'llama3.2:3b' ,
140+ } ) ,
141+ } ) ;
142+
143+ expect ( res . status ) . toBe ( 200 ) ;
144+
145+ const data = await res . json ( ) ;
146+ expect ( data . resources [ 0 ] . manifest . spec . engine . type ) . toBe ( 'llamacpp' ) ;
147+ expect ( data . resources [ 0 ] . manifest . spec . image ) . toBe ( 'ghcr.io/kaito-project/aikit/llama3.2:3b' ) ;
148+ expect ( data . resources [ 0 ] . manifest . spec . provider . name ) . toBe ( 'kaito' ) ;
149+ } ) ;
94150 } ) ;
95151
96152 describe ( 'POST /api/deployments - storage validation' , ( ) => {
@@ -577,6 +633,91 @@ describe('Deployment Routes', () => {
577633 } ) ;
578634
579635 describe ( 'POST /api/deployments' , ( ) => {
636+ test ( 'resolves direct KAITO GGUF deployments to the runner image' , async ( ) => {
637+ let capturedConfig : any ;
638+
639+ restores . push (
640+ mockServiceMethod ( kubernetesService , 'createDeployment' , async ( config ) => {
641+ capturedConfig = config ;
642+ return undefined ;
643+ } ) ,
644+ ) ;
645+ restores . push (
646+ mockServiceMethod ( kubernetesService , 'getClusterGpuCapacity' , async ( ) => ( {
647+ totalGpus : 8 ,
648+ allocatedGpus : 0 ,
649+ availableGpus : 8 ,
650+ maxContiguousAvailable : 8 ,
651+ nodes : [ ] ,
652+ } ) ) ,
653+ ) ;
654+ restores . push (
655+ mockServiceMethod ( configService , 'getDefaultNamespace' , async ( ) => 'kaito-workspace' ) ,
656+ ) ;
657+
658+ const res = await app . request ( '/api/deployments' , {
659+ method : 'POST' ,
660+ headers : { 'Content-Type' : 'application/json' } ,
661+ body : JSON . stringify ( {
662+ name : 'nemotron-direct' ,
663+ namespace : 'kaito-workspace' ,
664+ provider : 'kaito' ,
665+ modelId : 'unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF' ,
666+ engine : 'llamacpp' ,
667+ modelSource : 'huggingface' ,
668+ ggufFile : 'NVIDIA-Nemotron-3-Nano-4B-Q4_K_M.gguf' ,
669+ ggufRunMode : 'direct' ,
670+ resources : { gpu : 1 } ,
671+ } ) ,
672+ } ) ;
673+
674+ expect ( res . status ) . toBe ( 201 ) ;
675+ expect ( capturedConfig . imageRef ) . toBe ( 'ghcr.io/kaito-project/aikit/runners/llama-cpp-cuda:latest' ) ;
676+ expect ( capturedConfig . engineArgs ?. ggufUrl ) . toBe (
677+ 'https://huggingface.co/unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF/resolve/main/NVIDIA-Nemotron-3-Nano-4B-Q4_K_M.gguf'
678+ ) ;
679+ } ) ;
680+
681+ test ( 'resolves premade KAITO deployments to the premade image' , async ( ) => {
682+ let capturedConfig : any ;
683+
684+ restores . push (
685+ mockServiceMethod ( kubernetesService , 'createDeployment' , async ( config ) => {
686+ capturedConfig = config ;
687+ return undefined ;
688+ } ) ,
689+ ) ;
690+ restores . push (
691+ mockServiceMethod ( kubernetesService , 'getClusterGpuCapacity' , async ( ) => ( {
692+ totalGpus : 8 ,
693+ allocatedGpus : 0 ,
694+ availableGpus : 8 ,
695+ maxContiguousAvailable : 8 ,
696+ nodes : [ ] ,
697+ } ) ) ,
698+ ) ;
699+ restores . push (
700+ mockServiceMethod ( configService , 'getDefaultNamespace' , async ( ) => 'kaito-workspace' ) ,
701+ ) ;
702+
703+ const res = await app . request ( '/api/deployments' , {
704+ method : 'POST' ,
705+ headers : { 'Content-Type' : 'application/json' } ,
706+ body : JSON . stringify ( {
707+ name : 'llama-premade' ,
708+ namespace : 'kaito-workspace' ,
709+ provider : 'kaito' ,
710+ modelId : 'llama3.2:3b' ,
711+ engine : 'llamacpp' ,
712+ modelSource : 'premade' ,
713+ premadeModel : 'llama3.2:3b' ,
714+ } ) ,
715+ } ) ;
716+
717+ expect ( res . status ) . toBe ( 201 ) ;
718+ expect ( capturedConfig . imageRef ) . toBe ( 'ghcr.io/kaito-project/aikit/llama3.2:3b' ) ;
719+ } ) ;
720+
580721 test ( 'accepts deployment with providerOverrides' , async ( ) => {
581722 restores . push (
582723 mockServiceMethod ( kubernetesService , 'createDeployment' , async ( ) => undefined ) ,
0 commit comments