Skip to content

Commit 7c5385d

Browse files
author
Abhishek Bongale
committed
Add Docker Compose generator
This commit introduces the `--generate=compose` option to the `ramalama serve` command, enabling users to generate a `docker-compose.yaml` file for a given model. This new generator aligns with the project's goal of simplifying AI model deployment by providing a declarative artifact for the
1 parent 6da1e9c commit 7c5385d

File tree

13 files changed

+692
-0
lines changed

13 files changed

+692
-0
lines changed

ramalama/compose.py

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
# ramalama/generate/compose.py
2+
3+
import os
4+
import shlex
5+
from typing import Optional, Tuple
6+
7+
from ramalama.common import MNT_DIR, RAG_DIR, genname, get_accel_env_vars
8+
from ramalama.file import PlainFile
9+
from ramalama.version import version
10+
11+
12+
class Compose:
13+
"""
14+
Generates a docker-compose.yaml file from a ramalama serve configuration,
15+
structurally mirroring the logic in the Quadlet and Kube classes.
16+
"""
17+
18+
def __init__(
19+
self,
20+
model_name: str,
21+
model_paths: Tuple[str, str],
22+
chat_template_paths: Optional[Tuple[str, str]],
23+
mmproj_paths: Optional[Tuple[str, str]],
24+
args,
25+
exec_args,
26+
):
27+
self.src_model_path, self.dest_model_path = model_paths
28+
self.src_chat_template_path, self.dest_chat_template_path = (
29+
chat_template_paths if chat_template_paths is not None else ("", "")
30+
)
31+
self.src_mmproj_path, self.dest_mmproj_path = (
32+
mmproj_paths if mmproj_paths is not None else ("", "")
33+
)
34+
self.src_model_path = self.src_model_path.removeprefix("oci://")
35+
36+
self.model_name = model_name
37+
custom_name = getattr(args, "name", None)
38+
self.name = custom_name if custom_name else f"ramalama-{model_name}"
39+
self.args = args
40+
self.exec_args = exec_args
41+
self.image = args.image
42+
43+
def _gen_volumes(self) -> str:
44+
"""
45+
Generates the complete 'volumes' block for the Compose file.
46+
"""
47+
volumes = " volumes:"
48+
49+
# Model Volume
50+
volumes += self._gen_model_volume()
51+
52+
# RAG Volume
53+
if getattr(self.args, "rag", None):
54+
volumes += self._gen_rag_volume()
55+
56+
# Chat Template Volume
57+
if self.src_chat_template_path and os.path.exists(self.src_chat_template_path):
58+
volumes += self._gen_chat_template_volume()
59+
60+
# MMProj Volume
61+
if self.src_mmproj_path and os.path.exists(self.src_mmproj_path):
62+
volumes += self._gen_mmproj_volume()
63+
64+
return volumes
65+
66+
def _gen_model_volume(self) -> str:
67+
"""Generates the volume mount for the main AI model from a host path."""
68+
return f'\n - "{self.src_model_path}:{self.dest_model_path}:ro"'
69+
70+
def _gen_rag_volume(self) -> str:
71+
"""
72+
Generates the RAG volume mount. It now supports OCI images directly
73+
for both Docker Compose and Podman Compose.
74+
"""
75+
rag_source = self.args.rag
76+
volume_str = ""
77+
78+
if rag_source.startswith("oci:"):
79+
oci_image = rag_source.removeprefix("oci:")
80+
# This is the standard long-form syntax for image volumes, now supported by Docker.
81+
volume_str = f"""
82+
- type: image
83+
source: {oci_image}
84+
target: {RAG_DIR}
85+
image:
86+
readonly: true"""
87+
88+
elif os.path.exists(rag_source):
89+
# Standard host path mount
90+
volume_str = f'\n - "{rag_source}:{RAG_DIR}:ro"'
91+
92+
return volume_str
93+
94+
def _gen_chat_template_volume(self) -> str:
95+
"""Generates the volume mount for a chat template file."""
96+
return f'\n - "{self.src_chat_template_path}:{self.dest_chat_template_path}:ro"'
97+
98+
def _gen_mmproj_volume(self) -> str:
99+
"""Generates the volume mount for a multimodal projection file."""
100+
return f'\n - "{self.src_mmproj_path}:{self.dest_mmproj_path}:ro"'
101+
102+
def _gen_devices(self) -> str:
103+
"""Generates the 'devices' block for AMD/Intel GPUs."""
104+
device_list = []
105+
for dev_path in ["/dev/dri", "/dev/kfd", "/dev/accel"]:
106+
if os.path.exists(dev_path):
107+
device_list.append(dev_path)
108+
109+
if not device_list:
110+
return ""
111+
112+
devices_str = " devices:"
113+
for dev in device_list:
114+
devices_str += f'\n - "{dev}:{dev}"'
115+
return devices_str
116+
117+
def _gen_ports(self) -> str:
118+
"""Generates the 'ports' block."""
119+
port_arg = getattr(self.args, "port", None)
120+
if not port_arg:
121+
# Default to 8080 if no port is specified
122+
return ' ports:\n - "8080:8080"'
123+
124+
p = port_arg.split(":", 2)
125+
host_port = p[1] if len(p) > 1 else p[0]
126+
container_port = p[0]
127+
return f' ports:\n - "{host_port}:{container_port}"'
128+
129+
def _gen_environment(self) -> str:
130+
"""Generates the 'environment' block."""
131+
env_vars = get_accel_env_vars()
132+
# Allow user to override with --env
133+
if getattr(self.args, "env", None):
134+
for e in self.args.env:
135+
key, val = e.split("=", 1)
136+
env_vars[key] = val
137+
138+
if not env_vars:
139+
return ""
140+
141+
env_spec = " environment:"
142+
for k, v in env_vars.items():
143+
env_spec += f'\n - {k}={v}'
144+
return env_spec
145+
146+
def _gen_gpu_deployment(self) -> str:
147+
"""
148+
Generates the 'deploy' block for NVIDIA GPU access, the modern
149+
standard for Docker Compose.
150+
"""
151+
# Heuristic: if 'cuda' is in the image name, assume NVIDIA GPU needed.
152+
if "cuda" not in self.image:
153+
return ""
154+
155+
return """\
156+
deploy:
157+
resources:
158+
reservations:
159+
devices:
160+
- driver: nvidia
161+
count: all
162+
capabilities: [gpu]"""
163+
164+
def _gen_command(self) -> str:
165+
"""Generates the 'command' field from the execution arguments."""
166+
if not self.exec_args:
167+
return ""
168+
# shlex.join is perfect for creating a command string from a list
169+
cmd = shlex.join(self.exec_args)
170+
return f" command: {cmd}"
171+
172+
def generate(self) -> PlainFile:
173+
"""
174+
Assembles and returns the full docker-compose.yaml file content.
175+
"""
176+
_version = version()
177+
178+
# Generate all the dynamic sections of the YAML file
179+
volumes_string = self._gen_volumes()
180+
ports_string = self._gen_ports()
181+
environment_string = self._gen_environment()
182+
devices_string = self._gen_devices()
183+
gpu_deploy_string = self._gen_gpu_deployment()
184+
command_string = self._gen_command()
185+
186+
# Assemble the final file content
187+
content = f"""\
188+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
189+
#
190+
# Created with ramalama-{_version}
191+
192+
services:
193+
{self.model_name}:
194+
container_name: {self.name}
195+
image: {self.image}
196+
{volumes_string}
197+
{ports_string}
198+
{environment_string}
199+
{devices_string}
200+
{gpu_deploy_string}
201+
{command_string}
202+
restart: unless-stopped
203+
"""
204+
# Clean up any empty lines that might result from empty sections
205+
content = "\n".join(line for line in content.splitlines() if line.strip())
206+
207+
return genfile(self.name, content)
208+
209+
210+
def genfile(name: str, content: str) -> PlainFile:
211+
"""Creates a PlainFile object for the generated content."""
212+
file_name = "docker-compose.yaml"
213+
print(f"Generating Docker Compose file: {file_name}")
214+
215+
file = PlainFile(file_name)
216+
file.content = content
217+
return file

ramalama/model.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
perror,
2121
set_accel_env_vars,
2222
)
23+
from ramalama.compose import Compose
2324
from ramalama.config import CONFIG, DEFAULT_PORT, DEFAULT_PORT_RANGE
2425
from ramalama.console import should_colorize
2526
from ramalama.engine import Engine, dry_run
@@ -720,6 +721,15 @@ def generate_container_config(self, args, exec_args):
720721
exec_args,
721722
args.generate.output_dir,
722723
)
724+
elif args.generate.gen_type == "compose":
725+
self.compose(
726+
(model_src_path, model_dest_path),
727+
(chat_template_src_path, chat_template_dest_path),
728+
(mmproj_src_path, mmproj_dest_path),
729+
args,
730+
exec_args,
731+
args.generate.output_dir,
732+
)
723733

724734
def execute_command(self, exec_args, args):
725735
try:
@@ -775,6 +785,10 @@ def kube(self, model_paths, chat_template_paths, args, exec_args, output_dir):
775785
kube = Kube(self.model_name, model_paths, chat_template_paths, args, exec_args)
776786
kube.generate().write(output_dir)
777787

788+
def compose(self, model_paths, chat_template_paths, mmproj_paths, args, exec_args, output_dir):
789+
compose = Compose(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args)
790+
compose.generate().write(output_dir)
791+
778792
def inspect(self, args):
779793
self.ensure_model_exists(args)
780794

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
command: llama-server --model /mnt/models/tinyllama.gguf
19+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
- "/templates/chat.json:/mnt/templates/chat.json:ro"
11+
ports:
12+
- "8080:8080"
13+
environment:
14+
- ACCEL_ENV=true
15+
devices:
16+
- "/dev/dri:/dev/dri"
17+
- "/dev/kfd:/dev/kfd"
18+
- "/dev/accel:/dev/accel"
19+
restart: unless-stopped
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: my-custom-api-name
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
restart: unless-stopped
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
- LOG_LEVEL=debug
15+
- THREADS=8
16+
devices:
17+
- "/dev/dri:/dev/dri"
18+
- "/dev/kfd:/dev/kfd"
19+
- "/dev/accel:/dev/accel"
20+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
llava:
6+
container_name: ramalama-llava
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/llava.gguf:/mnt/models/llava.gguf:ro"
10+
- "/models/llava.mmproj:/mnt/models/llava.mmproj:ro"
11+
ports:
12+
- "8080:8080"
13+
environment:
14+
- ACCEL_ENV=true
15+
devices:
16+
- "/dev/dri:/dev/dri"
17+
- "/dev/kfd:/dev/kfd"
18+
- "/dev/accel:/dev/accel"
19+
restart: unless-stopped
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
gemma-cuda:
6+
container_name: ramalama-gemma-cuda
7+
image: test-image/cuda:latest
8+
volumes:
9+
- "/models/gemma.gguf:/mnt/models/gemma.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
deploy:
19+
resources:
20+
reservations:
21+
devices:
22+
- driver: nvidia
23+
count: all
24+
capabilities: [gpu]
25+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "9090:9090"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
command: llama-server
19+
restart: unless-stopped

0 commit comments

Comments
 (0)