@@ -2,7 +2,6 @@ package inference
22
33import (
44 "fmt"
5- "net/url"
65 "slices"
76 "strings"
87
@@ -90,39 +89,37 @@ func getBaseImage(c *config.InferenceConfig, platform *specs.Platform) llb.State
9089
9190// writeConfig writes the /config.yaml file to the image when c.Config is set.
9291func writeConfig (c * config.InferenceConfig , base llb.State , s llb.State , platform specs.Platform ) (llb.State , llb.State ) {
93- savedState := s
94- if c .Config != "" {
95- s = s .File (
96- llb .Mkfile ("/config.yaml" , 0o644 , []byte (c .Config )),
97- llb .WithCustomName (fmt .Sprintf ("Creating config for platform %s/%s" , platform .OS , platform .Architecture )),
98- )
99- }
100- diff := llb .Diff (savedState , s )
101- merge := llb .Merge ([]llb.State {base , diff })
102- return s , merge
92+ return applyAndMerge (s , base , func (s llb.State ) llb.State {
93+ if c .Config != "" {
94+ s = s .File (
95+ llb .Mkfile ("/config.yaml" , 0o644 , []byte (c .Config )),
96+ llb .WithCustomName (fmt .Sprintf ("Creating config for platform %s/%s" , platform .OS , platform .Architecture )),
97+ )
98+ }
99+ return s
100+ })
103101}
104102
105103// copyModels copies models to the image and writes the config.
106104func copyModels (c * config.InferenceConfig , base llb.State , s llb.State , platform specs.Platform ) (llb.State , llb.State , error ) {
107105 savedState := s
108106 for _ , model := range c .Models {
109- // Check if the model source is a URL
110- if _ , err := url . ParseRequestURI ( model . Source ); err == nil {
111- switch {
112- case strings . HasPrefix ( model . Source , "oci://" ):
113- s = handleOCI ( model . Source , s , platform )
114- case strings . HasPrefix ( model . Source , "http://" ), strings . HasPrefix ( model . Source , "https://" ):
115- s = handleHTTP (model .Source , model . Name , model . SHA256 , s )
116- case strings . HasPrefix (model .Source , "huggingface://" ):
117- s , err = handleHuggingFace (model .Source , s )
118- if err != nil {
119- return llb. State {}, llb. State {}, err
120- }
121- default :
122- return llb.State {}, llb.State {}, fmt . Errorf ( "unsupported URL scheme: %s" , model . Source )
107+ // Dispatch on the source's URI scheme. Anything without a recognized
108+ // scheme (including absolute local paths like /models/foo.gguf) is treated
109+ // as a local file. The previous url.ParseRequestURI guard incorrectly
110+ // rejected absolute local paths, which parse as URIs with an empty scheme.
111+ var err error
112+ switch {
113+ case strings . HasPrefix (model .Source , "oci://" ):
114+ s = handleOCI (model .Source , s , platform )
115+ case strings . HasPrefix ( model . Source , "http://" ), strings . HasPrefix (model .Source , "https://" ):
116+ s = handleHTTP ( model . Source , model . Name , model . SHA256 , s )
117+ case strings . HasPrefix ( model . Source , "huggingface://" ):
118+ s , err = handleHuggingFace ( model . Source , s )
119+ if err != nil {
120+ return llb.State {}, llb.State {}, err
123121 }
124- } else {
125- // Handle local paths
122+ default :
126123 s = handleLocal (model .Source , s )
127124 }
128125
@@ -155,50 +152,47 @@ func installCuda(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.S
155152 )
156153 s = s .Run (utils .Sh ("dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb" )).Root ()
157154
158- savedState := s
159- // running apt-get update twice due to nvidia repo
160- s = s .Run (utils .Sh ("apt-get update && apt-get install --no-install-recommends -y ca-certificates && apt-get update" ), llb .IgnoreCache ).Root ()
161-
162- // install cuda libraries for llama-cpp (default) and vllm backends
163- if len (c .Backends ) == 0 || slices .Contains (c .Backends , utils .BackendLlamaCpp ) || slices .Contains (c .Backends , utils .BackendVLLM ) {
164- // install cuda libraries and pciutils for gpu detection
165- s = s .Run (utils .Shf ("apt-get install -y --no-install-recommends pciutils libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean" , cudaVersion )).Root ()
166- // TODO: clean up /var/lib/dpkg/status
167- }
155+ return applyAndMerge (s , merge , func (s llb.State ) llb.State {
156+ // running apt-get update twice due to nvidia repo
157+ s = s .Run (utils .Sh ("apt-get update && apt-get install --no-install-recommends -y ca-certificates && apt-get update" ), llb .IgnoreCache ).Root ()
168158
169- diff := llb .Diff (savedState , s )
170- return s , llb .Merge ([]llb.State {merge , diff })
159+ // install cuda libraries for llama-cpp (default) and vllm backends
160+ if len (c .Backends ) == 0 || slices .Contains (c .Backends , utils .BackendLlamaCpp ) || slices .Contains (c .Backends , utils .BackendVLLM ) {
161+ // install cuda libraries and pciutils for gpu detection
162+ s = s .Run (utils .Shf ("apt-get install -y --no-install-recommends pciutils libcublas-%[1]s cuda-cudart-%[1]s && apt-get clean" , cudaVersion )).Root ()
163+ // TODO: clean up /var/lib/dpkg/status
164+ }
165+ return s
166+ })
171167}
172168
173169func installRocm (c * config.InferenceConfig , s llb.State , merge llb.State ) (llb.State , llb.State ) {
174- savedState := s
175-
176- // Set up ROCm repository
177- s = s .Run (utils .Sh ("apt-get update && apt-get install --no-install-recommends -y ca-certificates curl gnupg" ), llb .IgnoreCache ).Root ()
178-
179- // Add ROCm GPG key and repository
180- s = s .Run (utils .Sh ("curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm.gpg" )).Root ()
181- s = s .Run (utils .Shf ("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/rocm/apt/%s/ noble main' >> /etc/apt/sources.list.d/rocm.list" , rocmVersion )).Root ()
182- s = s .Run (utils .Shf ("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/graphics/%s/ubuntu noble main' >> /etc/apt/sources.list.d/rocm.list" , rocmVersion )).Root ()
183- rocmPinning := `
170+ return applyAndMerge (s , merge , func (s llb.State ) llb.State {
171+ // Set up ROCm repository
172+ s = s .Run (utils .Sh ("apt-get update && apt-get install --no-install-recommends -y ca-certificates curl gnupg" ), llb .IgnoreCache ).Root ()
173+
174+ // Add ROCm GPG key and repository
175+ s = s .Run (utils .Sh ("curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm.gpg" )).Root ()
176+ s = s .Run (utils .Shf ("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/rocm/apt/%s/ noble main' >> /etc/apt/sources.list.d/rocm.list" , rocmVersion )).Root ()
177+ s = s .Run (utils .Shf ("echo 'deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm.gpg] https://repo.radeon.com/graphics/%s/ubuntu noble main' >> /etc/apt/sources.list.d/rocm.list" , rocmVersion )).Root ()
178+ rocmPinning := `
184179Package: *
185180Pin: release o=repo.radeon.com
186181Pin-Priority: 600
187182`
188- s = s .Run (utils .Shf ("echo '%s' > /etc/apt/preferences.d/repo-radeon-pin-600" , rocmPinning )).Root ()
189- s = s .Run (utils .Sh ("apt-get update" ), llb .IgnoreCache ).Root ()
190-
191- // install rocm libraries and pciutils for gpu detection when using the default
192- // llama-cpp backend or when it is configured explicitly
193- if len (c .Backends ) == 0 || slices .Contains (c .Backends , utils .BackendLlamaCpp ) {
194- s = s .Run (utils .Sh ("apt-get install -y pciutils rocm && apt-get clean" )).Root ()
195- }
183+ s = s .Run (utils .Shf ("echo '%s' > /etc/apt/preferences.d/repo-radeon-pin-600" , rocmPinning )).Root ()
184+ s = s .Run (utils .Sh ("apt-get update" ), llb .IgnoreCache ).Root ()
196185
197- // hipblaslt soname compatibility: backend may be linked against .so.0 while ROCm 7.2 ships .so.1
198- s = s .Run (utils .Sh ("set -e; cd /opt/rocm/lib; [ -e libhipblaslt.so.0 ] || ln -sf libhipblaslt.so.1 libhipblaslt.so.0" )).Root ()
186+ // install rocm libraries and pciutils for gpu detection when using the default
187+ // llama-cpp backend or when it is configured explicitly
188+ if len (c .Backends ) == 0 || slices .Contains (c .Backends , utils .BackendLlamaCpp ) {
189+ s = s .Run (utils .Sh ("apt-get install -y pciutils rocm && apt-get clean" )).Root ()
190+ }
199191
200- diff := llb .Diff (savedState , s )
201- return s , llb .Merge ([]llb.State {merge , diff })
192+ // hipblaslt soname compatibility: backend may be linked against .so.0 while ROCm 7.2 ships .so.1
193+ s = s .Run (utils .Sh ("set -e; cd /opt/rocm/lib; [ -e libhipblaslt.so.0 ] || ln -sf libhipblaslt.so.1 libhipblaslt.so.0" )).Root ()
194+ return s
195+ })
202196}
203197
204198// addLocalAI adds the LocalAI binary to the image.
@@ -218,20 +212,18 @@ func addLocalAI(c *config.InferenceConfig, s llb.State, merge llb.State, platfor
218212 return s , merge , fmt .Errorf ("unsupported architecture %s" , platform .Architecture )
219213 }
220214
221- savedState := s
222-
223215 // Use the oras CLI image to pull the artifact containing the LocalAI binary
224216 tooling := llb .Image (orasImage , llb .Platform (platform )).Run (
225217 utils .Shf ("set -e\n oras pull %[1]s\n chmod +x local-ai\n chmod 755 local-ai" , art .Ref ),
226218 llb .WithCustomName ("Pulling LocalAI from OCI artifact " + art .Ref ),
227219 ).Root ()
228220
229221 // Copy the prepared binary into /usr/bin/local-ai
230- s = s . File (
231- llb . Copy ( tooling , "local-ai" , "/usr/bin/local-ai" ),
232- llb .WithCustomName ( "Copying local-ai from OCI artifact to /usr/bin" ),
233- )
234-
235- diff := llb . Diff ( savedState , s )
236- return s , llb . Merge ([]llb. State { merge , diff }) , nil
222+ s , merge = applyAndMerge ( s , merge , func ( s llb. State ) llb. State {
223+ return s . File (
224+ llb .Copy ( tooling , " local-ai" , " /usr/bin/local-ai " ),
225+ llb . WithCustomName ( "Copying local-ai from OCI artifact to /usr/bin" ),
226+ )
227+ } )
228+ return s , merge , nil
237229}
0 commit comments