Skip to content

Commit 11a2cce

Browse files
committed
Add Docker Compose generator
This commit introduces the `--generate=compose` option to the `ramalama serve` command, enabling users to generate a `docker-compose.yaml` file for a given model.
1 parent 980a101 commit 11a2cce

File tree

13 files changed

+651
-0
lines changed

13 files changed

+651
-0
lines changed

ramalama/compose.py

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
# ramalama/generate/compose.py
2+
3+
import os
4+
import shlex
5+
from typing import Optional, Tuple
6+
7+
from ramalama.common import RAG_DIR, get_accel_env_vars
8+
from ramalama.file import PlainFile
9+
from ramalama.version import version
10+
11+
12+
class Compose:
13+
def __init__(
14+
self,
15+
model_name: str,
16+
model_paths: Tuple[str, str],
17+
chat_template_paths: Optional[Tuple[str, str]],
18+
mmproj_paths: Optional[Tuple[str, str]],
19+
args,
20+
exec_args,
21+
):
22+
self.src_model_path, self.dest_model_path = model_paths
23+
self.src_chat_template_path, self.dest_chat_template_path = (
24+
chat_template_paths if chat_template_paths is not None else ("", "")
25+
)
26+
self.src_mmproj_path, self.dest_mmproj_path = mmproj_paths if mmproj_paths is not None else ("", "")
27+
self.src_model_path = self.src_model_path.removeprefix("oci://")
28+
29+
self.model_name = model_name
30+
custom_name = getattr(args, "name", None)
31+
self.name = custom_name if custom_name else f"ramalama-{model_name}"
32+
self.args = args
33+
self.exec_args = exec_args
34+
self.image = args.image
35+
36+
def _gen_volumes(self) -> str:
37+
volumes = " volumes:"
38+
39+
# Model Volume
40+
volumes += self._gen_model_volume()
41+
42+
# RAG Volume
43+
if getattr(self.args, "rag", None):
44+
volumes += self._gen_rag_volume()
45+
46+
# Chat Template Volume
47+
if self.src_chat_template_path and os.path.exists(self.src_chat_template_path):
48+
volumes += self._gen_chat_template_volume()
49+
50+
# MMProj Volume
51+
if self.src_mmproj_path and os.path.exists(self.src_mmproj_path):
52+
volumes += self._gen_mmproj_volume()
53+
54+
return volumes
55+
56+
def _gen_model_volume(self) -> str:
57+
return f'\n - "{self.src_model_path}:{self.dest_model_path}:ro"'
58+
59+
def _gen_rag_volume(self) -> str:
60+
rag_source = self.args.rag
61+
volume_str = ""
62+
63+
if rag_source.startswith("oci:"):
64+
oci_image = rag_source.removeprefix("oci:")
65+
# This is the standard long-form syntax for image volumes, now supported by Docker.
66+
volume_str = f"""
67+
- type: image
68+
source: {oci_image}
69+
target: {RAG_DIR}
70+
image:
71+
readonly: true"""
72+
73+
elif os.path.exists(rag_source):
74+
# Standard host path mount
75+
volume_str = f'\n - "{rag_source}:{RAG_DIR}:ro"'
76+
77+
return volume_str
78+
79+
def _gen_chat_template_volume(self) -> str:
80+
return f'\n - "{self.src_chat_template_path}:{self.dest_chat_template_path}:ro"'
81+
82+
def _gen_mmproj_volume(self) -> str:
83+
return f'\n - "{self.src_mmproj_path}:{self.dest_mmproj_path}:ro"'
84+
85+
def _gen_devices(self) -> str:
86+
device_list = []
87+
for dev_path in ["/dev/dri", "/dev/kfd", "/dev/accel"]:
88+
if os.path.exists(dev_path):
89+
device_list.append(dev_path)
90+
91+
if not device_list:
92+
return ""
93+
94+
devices_str = " devices:"
95+
for dev in device_list:
96+
devices_str += f'\n - "{dev}:{dev}"'
97+
return devices_str
98+
99+
def _gen_ports(self) -> str:
100+
port_arg = getattr(self.args, "port", None)
101+
if not port_arg:
102+
# Default to 8080 if no port is specified
103+
return ' ports:\n - "8080:8080"'
104+
105+
p = port_arg.split(":", 2)
106+
host_port = p[1] if len(p) > 1 else p[0]
107+
container_port = p[0]
108+
return f' ports:\n - "{host_port}:{container_port}"'
109+
110+
def _gen_environment(self) -> str:
111+
env_vars = get_accel_env_vars()
112+
# Allow user to override with --env
113+
if getattr(self.args, "env", None):
114+
for e in self.args.env:
115+
key, val = e.split("=", 1)
116+
env_vars[key] = val
117+
118+
if not env_vars:
119+
return ""
120+
121+
env_spec = " environment:"
122+
for k, v in env_vars.items():
123+
env_spec += f'\n - {k}={v}'
124+
return env_spec
125+
126+
def _gen_gpu_deployment(self) -> str:
127+
if "cuda" not in self.image:
128+
return ""
129+
130+
return """\
131+
deploy:
132+
resources:
133+
reservations:
134+
devices:
135+
- driver: nvidia
136+
count: all
137+
capabilities: [gpu]"""
138+
139+
def _gen_command(self) -> str:
140+
if not self.exec_args:
141+
return ""
142+
# shlex.join is perfect for creating a command string from a list
143+
cmd = shlex.join(self.exec_args)
144+
return f" command: {cmd}"
145+
146+
def generate(self) -> PlainFile:
147+
_version = version()
148+
149+
# Generate all the dynamic sections of the YAML file
150+
volumes_string = self._gen_volumes()
151+
ports_string = self._gen_ports()
152+
environment_string = self._gen_environment()
153+
devices_string = self._gen_devices()
154+
gpu_deploy_string = self._gen_gpu_deployment()
155+
command_string = self._gen_command()
156+
157+
# Assemble the final file content
158+
content = f"""\
159+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
160+
#
161+
# Created with ramalama-{_version}
162+
163+
services:
164+
{self.model_name}:
165+
container_name: {self.name}
166+
image: {self.image}
167+
{volumes_string}
168+
{ports_string}
169+
{environment_string}
170+
{devices_string}
171+
{gpu_deploy_string}
172+
{command_string}
173+
restart: unless-stopped
174+
"""
175+
# Clean up any empty lines that might result from empty sections
176+
content = "\n".join(line for line in content.splitlines() if line.strip())
177+
178+
return genfile(self.name, content)
179+
180+
181+
def genfile(name: str, content: str) -> PlainFile:
182+
file_name = "docker-compose.yaml"
183+
print(f"Generating Docker Compose file: {file_name}")
184+
185+
file = PlainFile(file_name)
186+
file.content = content
187+
return file

ramalama/model.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
perror,
2222
set_accel_env_vars,
2323
)
24+
from ramalama.compose import Compose
2425
from ramalama.config import CONFIG, DEFAULT_PORT, DEFAULT_PORT_RANGE
2526
from ramalama.console import should_colorize
2627
from ramalama.engine import Engine, dry_run
@@ -748,6 +749,15 @@ def generate_container_config(self, args, exec_args):
748749
exec_args,
749750
args.generate.output_dir,
750751
)
752+
elif args.generate.gen_type == "compose":
753+
self.compose(
754+
(model_src_path, model_dest_path),
755+
(chat_template_src_path, chat_template_dest_path),
756+
(mmproj_src_path, mmproj_dest_path),
757+
args,
758+
exec_args,
759+
args.generate.output_dir,
760+
)
751761

752762
def execute_command(self, exec_args, args):
753763
try:
@@ -802,6 +812,10 @@ def kube(self, model_paths, chat_template_paths, mmproj_paths, args, exec_args,
802812
kube = Kube(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args)
803813
kube.generate().write(output_dir)
804814

815+
def compose(self, model_paths, chat_template_paths, mmproj_paths, args, exec_args, output_dir):
816+
compose = Compose(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args)
817+
compose.generate().write(output_dir)
818+
805819
def inspect(self, args) -> None:
806820
self.ensure_model_exists(args)
807821

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
command: llama-server --model /mnt/models/tinyllama.gguf
19+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
- "/templates/chat.json:/mnt/templates/chat.json:ro"
11+
ports:
12+
- "8080:8080"
13+
environment:
14+
- ACCEL_ENV=true
15+
devices:
16+
- "/dev/dri:/dev/dri"
17+
- "/dev/kfd:/dev/kfd"
18+
- "/dev/accel:/dev/accel"
19+
restart: unless-stopped
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: my-custom-api-name
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
restart: unless-stopped
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
- LOG_LEVEL=debug
15+
- THREADS=8
16+
devices:
17+
- "/dev/dri:/dev/dri"
18+
- "/dev/kfd:/dev/kfd"
19+
- "/dev/accel:/dev/accel"
20+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
llava:
6+
container_name: ramalama-llava
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/llava.gguf:/mnt/models/llava.gguf:ro"
10+
- "/models/llava.mmproj:/mnt/models/llava.mmproj:ro"
11+
ports:
12+
- "8080:8080"
13+
environment:
14+
- ACCEL_ENV=true
15+
devices:
16+
- "/dev/dri:/dev/dri"
17+
- "/dev/kfd:/dev/kfd"
18+
- "/dev/accel:/dev/accel"
19+
restart: unless-stopped
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
gemma-cuda:
6+
container_name: ramalama-gemma-cuda
7+
image: test-image/cuda:latest
8+
volumes:
9+
- "/models/gemma.gguf:/mnt/models/gemma.gguf:ro"
10+
ports:
11+
- "8080:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
deploy:
19+
resources:
20+
reservations:
21+
devices:
22+
- driver: nvidia
23+
count: all
24+
capabilities: [gpu]
25+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "9090:9090"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
command: llama-server
19+
restart: unless-stopped
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Save this output to a 'docker-compose.yaml' file and run 'docker compose up'.
2+
#
3+
# Created with ramalama-0.1.0-test
4+
services:
5+
tinyllama:
6+
container_name: ramalama-tinyllama
7+
image: test-image/ramalama:latest
8+
volumes:
9+
- "/models/tinyllama.gguf:/mnt/models/tinyllama.gguf:ro"
10+
ports:
11+
- "9090:8080"
12+
environment:
13+
- ACCEL_ENV=true
14+
devices:
15+
- "/dev/dri:/dev/dri"
16+
- "/dev/kfd:/dev/kfd"
17+
- "/dev/accel:/dev/accel"
18+
command: llama-server
19+
restart: unless-stopped

0 commit comments

Comments
 (0)