Skip to content

Commit 3ae86a9

Browse files
committed
Merge branch 'main' into improve-pull-output
# Conflicts: # pkg/distribution/distribution/client.go
2 parents 258bdb4 + 3fe0e1d commit 3ae86a9

File tree

21 files changed

+1063
-1118
lines changed

21 files changed

+1063
-1118
lines changed

README.md

Lines changed: 1 addition & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -402,21 +402,10 @@ in the form of [a Helm chart and static YAML](charts/docker-model-runner/README.
402402
If you are interested in a specific Kubernetes use-case, please start a
403403
discussion on the issue tracker.
404404
405-
<<<<<<< Updated upstream
406-
=======
407405
## dmrlet: Container Orchestrator for AI Inference
408406
409407
dmrlet is a purpose-built container orchestrator for AI inference workloads. Unlike Kubernetes, it focuses exclusively on running stateless inference containers with zero configuration overhead. Multi-GPU mapping "just works" without YAML, device plugins, or node selectors.
410408
411-
### Key Features
412-
413-
| Feature | Kubernetes | dmrlet |
414-
|---------|------------|--------|
415-
| Multi-GPU setup | Device plugins + node selectors + resource limits YAML | `dmrlet serve llama3 --gpus all` |
416-
| Config overhead | 50+ lines of YAML minimum | Zero YAML, CLI-only |
417-
| Time to first inference | Minutes (pod scheduling, image pull) | Seconds (model already local) |
418-
| Model management | External (mount PVCs, manage yourself) | Integrated with Docker Model Runner store |
419-
420409
### Building dmrlet
421410
422411
```bash
@@ -429,91 +418,12 @@ go build -o dmrlet ./cmd/dmrlet
429418
430419
### Usage
431420
432-
**Start the daemon:**
433-
```bash
434-
# Start in foreground
435-
dmrlet daemon
436-
437-
# With custom socket path
438-
dmrlet daemon --socket /tmp/dmrlet.sock
439-
```
440-
441421
**Serve a model:**
442422
```bash
443423
# Auto-detect backend and GPUs
444-
dmrlet serve llama3.2
445-
446-
# Specify backend
447-
dmrlet serve llama3.2 --backend vllm
448-
449-
# Specify GPU allocation
450-
dmrlet serve llama3.2 --gpus 0,1
451-
dmrlet serve llama3.2 --gpus all
452-
453-
# Multiple replicas
454-
dmrlet serve llama3.2 --replicas 2
455-
456-
# Backend-specific options
457-
dmrlet serve llama3.2 --ctx-size 4096 # llama.cpp context size
458-
dmrlet serve llama3.2 --gpu-memory 0.8 # vLLM GPU memory utilization
459-
```
460-
461-
**List running models:**
462-
```bash
463-
dmrlet ps
464-
# MODEL BACKEND REPLICAS GPUS ENDPOINTS STATUS
465-
# llama3.2 llama.cpp 1 [0,1,2,3] localhost:30000 healthy
466-
```
467-
468-
**View logs:**
469-
```bash
470-
dmrlet logs llama3.2 # Last 100 lines
471-
dmrlet logs llama3.2 -f # Follow logs
472-
```
473-
474-
**Scale replicas:**
475-
```bash
476-
dmrlet scale llama3.2 4 # Scale to 4 replicas
477-
```
478-
479-
**Stop a model:**
480-
```bash
481-
dmrlet stop llama3.2
482-
dmrlet stop --all # Stop all models
483-
```
484-
485-
**Check status:**
486-
```bash
487-
dmrlet status
488-
# DAEMON: running
489-
# SOCKET: /var/run/dmrlet.sock
490-
#
491-
# GPUs:
492-
# GPU 0: NVIDIA A100 80GB 81920MB (in use: llama3.2)
493-
# GPU 1: NVIDIA A100 80GB 81920MB (available)
494-
#
495-
# MODELS: 1 running
496-
```
497-
498-
### Supported Backends
499-
500-
- **llama.cpp** - Default backend for GGUF models
501-
- **vLLM** - High-throughput serving for safetensors models
502-
- **SGLang** - Fast serving with RadixAttention
503-
504-
### Architecture
505-
506-
```
507-
dmrlet daemon
508-
├── GPU Manager - Auto-detect and allocate GPUs
509-
├── Container Manager - Docker-based container lifecycle
510-
├── Service Registry - Endpoint discovery with load balancing
511-
├── Health Monitor - Auto-restart unhealthy containers
512-
├── Auto-scaler - Scale based on QPS/latency/GPU utilization
513-
└── Log Aggregator - Centralized log collection
424+
dmrlet serve gemma3
514425
```
515426
516-
>>>>>>> Stashed changes
517427
## Community
518428
519429
For general questions and discussion, please use [Docker Model Runner's Slack channel](https://dockercommunity.slack.com/archives/C09H9P5E57B).

pkg/distribution/builder/builder_test.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
"time"
1111

1212
"github.com/docker/model-runner/pkg/distribution/builder"
13-
"github.com/docker/model-runner/pkg/distribution/oci"
13+
"github.com/docker/model-runner/pkg/distribution/internal/testutil"
1414
"github.com/docker/model-runner/pkg/distribution/types"
1515
)
1616

@@ -398,8 +398,7 @@ func TestFromModelWithAdditionalLayers(t *testing.T) {
398398

399399
// TestFromModelErrorHandling tests that FromModel properly handles and surfaces errors from mdl.Layers()
400400
func TestFromModelErrorHandling(t *testing.T) {
401-
// Create a mock model that fails when Layers() is called
402-
mockModel := &mockFailingModel{}
401+
mockModel := testutil.WithLayersError(testutil.NewGGUFArtifact(t, filepath.Join("..", "assets", "dummy.gguf")), fmt.Errorf("simulated layers error"))
403402

404403
// Attempt to create a builder from the failing model
405404
_, err := builder.FromModel(mockModel)
@@ -424,12 +423,3 @@ func (ft *fakeTarget) Write(ctx context.Context, artifact types.ModelArtifact, w
424423
ft.artifact = artifact
425424
return nil
426425
}
427-
428-
// mockFailingModel is a mock that fails when Layers() is called
429-
type mockFailingModel struct {
430-
types.ModelArtifact
431-
}
432-
433-
func (m *mockFailingModel) Layers() ([]oci.Layer, error) {
434-
return nil, fmt.Errorf("simulated layers error")
435-
}

pkg/distribution/distribution/bundle_test.go

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@ import (
66
"path/filepath"
77
"testing"
88

9-
"github.com/docker/model-runner/pkg/distribution/builder"
10-
"github.com/docker/model-runner/pkg/distribution/internal/mutate"
11-
"github.com/docker/model-runner/pkg/distribution/internal/partial"
9+
"github.com/docker/model-runner/pkg/distribution/internal/testutil"
1210
"github.com/docker/model-runner/pkg/distribution/types"
1311
)
1412

@@ -22,12 +20,7 @@ func TestBundle(t *testing.T) {
2220
t.Fatalf("Failed to create client: %v", err)
2321
}
2422

25-
// Load dummy model from assets directory
26-
b, err := builder.FromPath(filepath.Join("..", "assets", "dummy.gguf"))
27-
if err != nil {
28-
t.Fatalf("Failed to create model: %v", err)
29-
}
30-
mdl := b.Model()
23+
mdl := testutil.NewGGUFArtifact(t, filepath.Join("..", "assets", "dummy.gguf"))
3124
singleGGUFID, err := mdl.ID()
3225
if err != nil {
3326
t.Fatalf("Failed to get model ID: %v", err)
@@ -36,12 +29,11 @@ func TestBundle(t *testing.T) {
3629
t.Fatalf("Failed to write model to store: %v", err)
3730
}
3831

39-
// Load model with multi-modal projector file
40-
mmprojLayer, err := partial.NewLayer(filepath.Join("..", "assets", "dummy.mmproj"), types.MediaTypeMultimodalProjector)
41-
if err != nil {
42-
t.Fatalf("Failed to create mmproj layer: %v", err)
43-
}
44-
mmprojMdl := mutate.AppendLayers(mdl, mmprojLayer)
32+
mmprojMdl := testutil.NewGGUFArtifact(
33+
t,
34+
filepath.Join("..", "assets", "dummy.gguf"),
35+
testutil.Layer(filepath.Join("..", "assets", "dummy.mmproj"), types.MediaTypeMultimodalProjector),
36+
)
4537
mmprojMdlID, err := mmprojMdl.ID()
4638
if err != nil {
4739
t.Fatalf("Failed to get model ID: %v", err)
@@ -50,12 +42,11 @@ func TestBundle(t *testing.T) {
5042
t.Fatalf("Failed to write model to store: %v", err)
5143
}
5244

53-
// Load model with template file
54-
templateLayer, err := partial.NewLayer(filepath.Join("..", "assets", "template.jinja"), types.MediaTypeChatTemplate)
55-
if err != nil {
56-
t.Fatalf("Failed to create chat template layer: %v", err)
57-
}
58-
templateMdl := mutate.AppendLayers(mdl, templateLayer)
45+
templateMdl := testutil.NewGGUFArtifact(
46+
t,
47+
filepath.Join("..", "assets", "dummy.gguf"),
48+
testutil.Layer(filepath.Join("..", "assets", "template.jinja"), types.MediaTypeChatTemplate),
49+
)
5950
templateMdlID, err := templateMdl.ID()
6051
if err != nil {
6152
t.Fatalf("Failed to get model ID: %v", err)
@@ -64,12 +55,12 @@ func TestBundle(t *testing.T) {
6455
t.Fatalf("Failed to write model to store: %v", err)
6556
}
6657

67-
// Load sharded dummy model from asset directory
68-
shardedB, err := builder.FromPath(filepath.Join("..", "assets", "dummy-00001-of-00002.gguf"))
69-
if err != nil {
70-
t.Fatalf("Failed to create model: %v", err)
71-
}
72-
shardedMdl := shardedB.Model()
58+
shardedMdl := testutil.NewDockerArtifact(
59+
t,
60+
types.Config{Format: types.FormatGGUF},
61+
testutil.Layer(filepath.Join("..", "assets", "dummy-00001-of-00002.gguf"), types.MediaTypeGGUF),
62+
testutil.Layer(filepath.Join("..", "assets", "dummy-00002-of-00002.gguf"), types.MediaTypeGGUF),
63+
)
7364
shardedGGUFID, err := shardedMdl.ID()
7465
if err != nil {
7566
t.Fatalf("Failed to get model ID: %v", err)

pkg/distribution/distribution/client.go

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,20 @@ import (
88
"log/slog"
99
"os"
1010
"path/filepath"
11-
"slices"
1211
"strings"
1312

1413
"github.com/docker/model-runner/pkg/distribution/huggingface"
1514
"github.com/docker/model-runner/pkg/distribution/internal/bundle"
1615
"github.com/docker/model-runner/pkg/distribution/internal/mutate"
1716
"github.com/docker/model-runner/pkg/distribution/internal/progress"
1817
"github.com/docker/model-runner/pkg/distribution/internal/store"
18+
"github.com/docker/model-runner/pkg/distribution/modelpack"
1919
"github.com/docker/model-runner/pkg/distribution/oci"
2020
"github.com/docker/model-runner/pkg/distribution/oci/authn"
2121
"github.com/docker/model-runner/pkg/distribution/oci/remote"
2222
"github.com/docker/model-runner/pkg/distribution/registry"
2323
"github.com/docker/model-runner/pkg/distribution/tarball"
2424
"github.com/docker/model-runner/pkg/distribution/types"
25-
"github.com/docker/model-runner/pkg/inference/platform"
2625
"github.com/docker/model-runner/pkg/internal/utils"
2726
)
2827

@@ -774,19 +773,14 @@ func (c *Client) GetBundle(ref string) (types.ModelBundle, error) {
774773
return c.store.BundleForModel(normalizedRef)
775774
}
776775

777-
func GetSupportedFormats() []types.Format {
778-
if platform.SupportsVLLM() {
779-
return []types.Format{types.FormatGGUF, types.FormatSafetensors, types.FormatDiffusers}
780-
}
781-
return []types.Format{types.FormatGGUF, types.FormatDiffusers}
782-
}
783-
784776
func checkCompat(image types.ModelArtifact, log *slog.Logger, reference string, progressWriter io.Writer) error {
785777
manifest, err := image.Manifest()
786778
if err != nil {
787779
return err
788780
}
789-
if manifest.Config.MediaType != types.MediaTypeModelConfigV01 && manifest.Config.MediaType != types.MediaTypeModelConfigV02 {
781+
if manifest.Config.MediaType != types.MediaTypeModelConfigV01 &&
782+
manifest.Config.MediaType != types.MediaTypeModelConfigV02 &&
783+
manifest.Config.MediaType != modelpack.MediaTypeModelConfigV1 {
790784
return fmt.Errorf(
791785
"config type %q is not supported (supported: %q, %q)"+
792786
" - try upgrading: %w",
@@ -804,14 +798,7 @@ func checkCompat(image types.ModelArtifact, log *slog.Logger, reference string,
804798
}
805799

806800
if config.GetFormat() == "" {
807-
log.Warn("Model format field is empty for , unable to verify format compatibility", "model", utils.SanitizeForLog(reference))
808-
} else if !slices.Contains(GetSupportedFormats(), config.GetFormat()) {
809-
// Write warning but continue with pull
810-
log.Warn(warnUnsupportedFormat)
811-
if err := progress.WriteWarning(progressWriter, warnUnsupportedFormat, oci.ModePull); err != nil {
812-
log.Warn("Failed to write warning message", "error", err)
813-
}
814-
// Don't return an error - allow the pull to continue
801+
log.Warn("Model format field is empty; unable to verify format compatibility", "model", utils.SanitizeForLog(reference))
815802
}
816803

817804
return nil

0 commit comments

Comments
 (0)