Skip to content

Commit 5a5ada0

Browse files
committed
Add Docker Compose generator
This commit introduces the `--generate=compose` option to the `ramalama serve` command, enabling users to generate a `docker-compose.yaml` file for a given model.
1 parent 980a101 commit 5a5ada0

File tree

13 files changed

+679
-0
lines changed

13 files changed

+679
-0
lines changed

ramalama/compose.py

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
# ramalama/generate/compose.py
2+
3+
import os
4+
import shlex
5+
from typing import Optional, Tuple
6+
7+
from ramalama.common import RAG_DIR, get_accel_env_vars
8+
from ramalama.file import PlainFile
9+
from ramalama.version import version
10+
11+
12+
class Compose:
13+
"""
14+
Generates a docker-compose.yaml file from a ramalama serve configuration,
15+
structurally mirroring the logic in the Quadlet and Kube classes.
16+
"""
17+
18+
def __init__(
19+
self,
20+
model_name: str,
21+
model_paths: Tuple[str, str],
22+
chat_template_paths: Optional[Tuple[str, str]],
23+
mmproj_paths: Optional[Tuple[str, str]],
24+
args,
25+
exec_args,
26+
):
27+
self.src_model_path, self.dest_model_path = model_paths
28+
self.src_chat_template_path, self.dest_chat_template_path = (
29+
chat_template_paths if chat_template_paths is not None else ("", "")
30+
)
31+
self.src_mmproj_path, self.dest_mmproj_path = mmproj_paths if mmproj_paths is not None else ("", "")
32+
self.src_model_path = self.src_model_path.removeprefix("oci://")
33+
34+
self.model_name = model_name
35+
custom_name = getattr(args, "name", None)
36+
self.name = custom_name if custom_name else f"ramalama-{model_name}"
37+
self.args = args
38+
self.exec_args = exec_args
39+
self.image = args.image
40+
41+
def _gen_volumes(self) -> str:
42+
"""
43+
Generates the complete 'volumes' block for the Compose file.
44+
"""
45+
volumes = " volumes:"
46+
47+
# Model Volume
48+
volumes += self._gen_model_volume()
49+
50+
# RAG Volume
51+
if getattr(self.args, "rag", None):
52+
volumes += self._gen_rag_volume()
53+
54+
# Chat Template Volume
55+
if self.src_chat_template_path and os.path.exists(self.src_chat_template_path):
56+
volumes += self._gen_chat_template_volume()
57+
58+
# MMProj Volume
59+
if self.src_mmproj_path and os.path.exists(self.src_mmproj_path):
60+
volumes += self._gen_mmproj_volume()
61+
62+
return volumes
63+
64+
def _gen_model_volume(self) -> str:
65+
"""Generates the volume mount for the main AI model from a host path."""
66+
return f'\n - "{self.src_model_path}:{self.dest_model_path}:ro"'
67+
68+
def _gen_rag_volume(self) -> str:
69+
"""
70+
Generates the RAG volume mount. It now supports OCI images directly
71+
for both Docker Compose and Podman Compose.
72+
"""
73+
rag_source = self.args.rag
74+
volume_str = ""
75+
76+
if rag_source.startswith("oci:"):
77+
oci_image = rag_source.removeprefix("oci:")
78+
# This is the standard long-form syntax for image volumes, now supported by Docker.
79+
volume_str = f"""
80+
- type: image
81+
source: {oci_image}
82+
target: {RAG_DIR}
83+
image:
84+
readonly: true"""
85+
86+
elif os.path.exists(rag_source):
87+
# Standard host path mount
88+
volume_str = f'\n - "{rag_source}:{RAG_DIR}:ro"'
89+
90+
return volume_str
91+
92+
def _gen_chat_template_volume(self) -> str:
93+
"""Generates the volume mount for a chat template file."""
94+
return f'\n - "{self.src_chat_template_path}:{self.dest_chat_template_path}:ro"'
95+
96+
def _gen_mmproj_volume(self) -> str:
97+
"""Generates the volume mount for a multimodal projection file."""
98+
return f'\n - "{self.src_mmproj_path}:{self.dest_mmproj_path}:ro"'
99+
100+
def _gen_devices(self) -> str:
101+
"""Generates the 'devices' block for AMD/Intel GPUs."""
102+
device_list = []
103+
for dev_path in ["/dev/dri", "/dev/kfd", "/dev/accel"]:
104+
if os.path.exists(dev_path):
105+
device_list.append(dev_path)
106+
107+
if not device_list:
108+
return ""
109+
110+
devices_str = " devices:"
111+
for dev in device_list:
112+
devices_str += f'\n - "{dev}:{dev}"'
113+
return devices_str
114+
115+
def _gen_ports(self) -> str:
116+
"""Generates the 'ports' block."""
117+
port_arg = getattr(self.args, "port", None)
118+
if not port_arg:
119+
# Default to 8080 if no port is specified
120+
return ' ports:\n - "8080:8080"'
121+
122+
p = port_arg.split(":", 2)
123+
host_port = p[1] if len(p) > 1 else p[0]
124+
container_port = p[0]
125+
return f' ports:\n - "{host_port}:{container_port}"'
126+
127+
def _gen_environment(self) -> str:
128+
"""Generates the 'environment' block."""
129+
env_vars = get_accel_env_vars()
130+
# Allow user to override with --env
131+
if getattr(self.args, "env", None):
132+
for e in self.args.env:
133+
key, val = e.split("=", 1)
134+
env_vars[key] = val
135+
136+
if not env_vars:
137+
return ""
138+
139+
env_spec = " environment:"
140+
for k, v in env_vars.items():
141+
env_spec += f'\n - {k}={v}'
142+
return env_spec
143+
144+
def _gen_gpu_deployment(self) -> str:
145+
"""
146+
Generates the 'deploy' block for NVIDIA GPU access, the modern
147+
standard for Docker Compose.
148+
"""
149+
# Heuristic: if 'cuda' is in the image name, assume NVIDIA GPU needed.
150+
if "cuda" not in self.image:
151+
return ""
152+
153+
return """\
154+
deploy:
155+
resources:
156+
reservations:
157+
devices:
158+
- driver: nvidia
159+
count: all
160+
capabilities: [gpu]"""
161+
162+
def _gen_command(self) -> str:
163+
"""Generates the 'command' field from the execution arguments."""
164+
if not self.exec_args:
165+
return ""
166+
# shlex.join is perfect for creating a command string from a list
167+
cmd = shlex.join(self.exec_args)
168+
return f" command: {cmd}"
169+
170+
def generate(self) -> PlainFile:
171+
"""
172+
Assembles and returns the full docker-compose.yaml file content.
173+
"""
174+
_version = version()
175+
176+
# Generate all the dynamic sections of the YAML file
177+
volumes_string = self._gen_volumes()
178+
ports_string = self._gen_ports()
179+
environment_string = self._gen_environment()
180+
devices_string = self._gen_devices()
181+
gpu_deploy_string = self._gen_gpu_deployment()
182+
command_string = self._gen_command()
183+
184+
# Assemble the final file content
185+
content = f"""\
186+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
187+
#
188+
# Created with ramalama-{_version}
189+
190+
services:
191+
{self.model_name}:
192+
container_name: {self.name}
193+
image: {self.image}
194+
{volumes_string}
195+
{ports_string}
196+
{environment_string}
197+
{devices_string}
198+
{gpu_deploy_string}
199+
{command_string}
200+
restart: unless-stopped
201+
"""
202+
# Clean up any empty lines that might result from empty sections
203+
content = "\n".join(line for line in content.splitlines() if line.strip())
204+
205+
return genfile(self.name, content)
206+
207+
208+
def genfile(name: str, content: str) -> PlainFile:
209+
"""Creates a PlainFile object for the generated content."""
210+
file_name = "docker-compose.yaml"
211+
print(f"Generating Docker Compose file: {file_name}")
212+
213+
file = PlainFile(file_name)
214+
file.content = content
215+
return file

ramalama/model.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
perror,
2222
set_accel_env_vars,
2323
)
24+
from ramalama.compose import Compose
2425
from ramalama.config import CONFIG, DEFAULT_PORT, DEFAULT_PORT_RANGE
2526
from ramalama.console import should_colorize
2627
from ramalama.engine import Engine, dry_run
@@ -748,6 +749,15 @@ def generate_container_config(self, args, exec_args):
748749
exec_args,
749750
args.generate.output_dir,
750751
)
752+
elif args.generate.gen_type == "compose":
753+
self.compose(
754+
(model_src_path, model_dest_path),
755+
(chat_template_src_path, chat_template_dest_path),
756+
(mmproj_src_path, mmproj_dest_path),
757+
args,
758+
exec_args,
759+
args.generate.output_dir,
760+
)
751761

752762
def execute_command(self, exec_args, args):
753763
try:
@@ -802,6 +812,10 @@ def kube(self, model_paths, chat_template_paths, mmproj_paths, args, exec_args,
802812
kube = Kube(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args)
803813
kube.generate().write(output_dir)
804814

815+
def compose(self, model_paths, chat_template_paths, mmproj_paths, args, exec_args, output_dir):
816+
compose = Compose(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args)
817+
compose.generate().write(output_dir)
818+
805819
def inspect(self, args) -> None:
806820
self.ensure_model_exists(args)
807821

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
command: llama-server --model /mnt/models/tinyllama.gguf
19+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
- "/templates/chat.json:/mnt/templates/chat.json:ro"
11+
ports:
12+
- "8080:8080"
13+
environment:
14+
- ACCEL_ENV=true
15+
devices:
16+
- "/dev/dri:/dev/dri"
17+
- "/dev/kfd:/dev/kfd"
18+
- "/dev/accel:/dev/accel"
19+
restart: unless-stopped
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: my-custom-api-name
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
restart: unless-stopped
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
- LOG_LEVEL=debug
15+
- THREADS=8
16+
devices:
17+
- "/dev/dri:/dev/dri"
18+
- "/dev/kfd:/dev/kfd"
19+
- "/dev/accel:/dev/accel"
20+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
llava:
6+
container_name: ramalama-llava
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/llava.gguf:/mnt/models/llava.gguf:ro"
10+
- "/models/llava.mmproj:/mnt/models/llava.mmproj:ro"
11+
ports:
12+
- "8080:8080"
13+
environment:
14+
- ACCEL_ENV=true
15+
devices:
16+
- "/dev/dri:/dev/dri"
17+
- "/dev/kfd:/dev/kfd"
18+
- "/dev/accel:/dev/accel"
19+
restart: unless-stopped
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
gemma-cuda:
6+
container_name: ramalama-gemma-cuda
7+
image: test-image/cuda:latest
8+
volumes:
9+
- "/models/gemma.gguf:/mnt/models/gemma.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
deploy:
19+
resources:
20+
reservations:
21+
devices:
22+
- driver: nvidia
23+
count: all
24+
capabilities: [gpu]
25+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "9090:9090"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
command: llama-server
19+
restart: unless-stopped

0 commit comments

Comments
 (0)