Skip to content

Commit 3b98757

Browse files
committed
2 parents 239b6e8 + 33e96a4 commit 3b98757

14 files changed

Lines changed: 19 additions & 23 deletions

File tree

infrastructure/controllers/argocd/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ resources:
1212
helmCharts:
1313
- name: argo-cd
1414
repo: https://argoproj.github.io/argo-helm
15-
version: "9.4.1" # It's good practice to pin the chart version
15+
version: "9.4.4" # It's good practice to pin the chart version
1616
releaseName: argocd
1717
namespace: argocd
1818
valuesFile: values.yaml

infrastructure/controllers/reloader/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ resources:
66
helmCharts:
77
- name: reloader
88
repo: https://stakater.github.io/stakater-charts
9-
version: 2.2.7
9+
version: 2.2.8
1010
releaseName: reloader
1111
valuesFile: values.yaml

infrastructure/storage/snapshot-controller/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ replicaCount: 2
55

66
image:
77
repository: registry.k8s.io/sig-storage/snapshot-controller
8-
tag: v8.4.0
8+
tag: v8.5.0
99

1010
affinity:
1111
podAntiAffinity:

monitoring/prometheus-stack/kustomization.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ patchesStrategicMerge:
2121
helmCharts:
2222
- name: kube-prometheus-stack
2323
repo: https://prometheus-community.github.io/helm-charts
24-
version: 81.6.2
24+
version: 81.6.9
2525
releaseName: kube-prometheus-stack
2626
namespace: prometheus-stack
2727
valuesFile: values.yaml

my-apps/ai/comfyui/deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ spec:
113113
- name: comfyui
114114
# Image from https://github.com/YanWenKun/ComfyUI-Docker/tree/main/cu128-megapak-pt29
115115
# renovate: datasource=docker depName=yanwk/comfyui-boot
116-
image: yanwk/comfyui-boot:cu128-megapak-pt29@sha256:52c8939b54000970fa852973856a63e41db87aaecfd8006bb2d7326794330597
116+
image: yanwk/comfyui-boot:cu128-megapak-pt29@sha256:778d8bd9d8e5ccf7ae4b8a6841f2ec484616d51c0c55ff473dffcbe817da3aee
117117
imagePullPolicy: IfNotPresent
118118
workingDir: /root
119119
ports:

my-apps/ai/llama-cpp/configmap.yaml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ data:
1818
ctx-size = 65536
1919
n-gpu-layers = 99
2020
tensor-split = 1,1
21-
cache-type-k = q8_0
22-
cache-type-v = q4_0
2321
temp = 1.0
2422
top-p = 0.95
2523
top-k = 40
@@ -39,8 +37,6 @@ data:
3937
ctx-size = 32768
4038
n-gpu-layers = 99
4139
tensor-split = 1,1
42-
cache-type-k = q8_0
43-
cache-type-v = q4_0
4440
temp = 0.7
4541
top-p = 0.95
4642
top-k = 20
@@ -54,18 +50,16 @@ data:
5450
# 397B total / 17B active (MoE) - Unsloth Dynamic Q4_K_XL
5551
# WARNING: ~5-15 tok/s due to cpu-moe offloading. Quality over speed.
5652
# Natively multimodal (vision + language), 256K context native
57-
# cpu-moe keeps attention on GPU, experts on CPU - MUCH faster than
53+
# override-tensor keeps attention on GPU, experts on CPU - MUCH faster than
5854
# unified memory swapping (targeted offload vs indiscriminate CUDA paging)
5955
model = /models/UD-Q4_K_XL/Qwen3.5-397B-A17B-UD-Q4_K_XL-00001-of-00006.gguf
6056
alias = qwen3.5, qwen 3.5, general, experimental slow
6157
ctx-size = 32768
6258
n-gpu-layers = 99
6359
tensor-split = 1,1
64-
cache-type-k = q8_0
65-
cache-type-v = q4_0
66-
cpu-moe = 1
67-
temp = 0.6
60+
override-tensor = exps=CPU
61+
temp = 0.7
6862
top-p = 0.95
69-
top-k = 20
63+
top-k = 40
7064
min-p = 0.0
7165
jinja = 1

my-apps/ai/llama-cpp/deployment.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ spec:
4848
- "-fa"
4949
- "on" # Explicitly set to 'on' so --jinja is read correctly
5050
- "--jinja"
51+
- "--fit" # Auto-fit dense layers to available VRAM
52+
- "on"
5153
- "--no-mmap" # Prevent page fault stalls - we have 400GB RAM to spare
5254
- "-b"
5355
- "4096" # Larger logical batch for faster prompt processing
@@ -67,7 +69,7 @@ spec:
6769
- name: NVIDIA_DRIVER_CAPABILITIES
6870
value: "compute,utility"
6971
- name: GGML_CUDA_ENABLE_UNIFIED_MEMORY
70-
value: "1" # Vital for Kimi-K2 1T model to bridge VRAM and 400GB RAM
72+
value: "1" # Bridges VRAM and 400GB RAM for Qwen3.5-397B MoE expert offloading
7173
- name: GGML_CUDA_PEER_MAX_BATCH_SIZE
7274
value: "128"
7375
- name: CUDA_SCALE_LAUNCH_QUEUES

my-apps/ai/ollama/deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ spec:
3434
effect: "NoSchedule"
3535
containers:
3636
- name: ollama
37-
image: ollama/ollama:0.15.6
37+
image: ollama/ollama:0.17.0
3838
imagePullPolicy: IfNotPresent
3939
ports:
4040
- containerPort: 11434

my-apps/ai/open-webui/deployment.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ spec:
2828
containers:
2929
- name: open-webui
3030
# renovate: datasource=docker depName=ghcr.io/open-webui/open-webui
31-
image: ghcr.io/open-webui/open-webui:cuda@sha256:268d2b1b1f66f2d3f9a4ac3e0f06e0f545e9d6aaf71027115861874120e7edde
31+
image: ghcr.io/open-webui/open-webui:cuda@sha256:b5e6c753c8c43cdcb987910fbe257c31abef134d44bc84683775e6a1e4f3fafe
3232
imagePullPolicy: IfNotPresent
3333
ports:
3434
- containerPort: 8080

my-apps/ai/open-webui/function-loader-job.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ spec:
2121
effect: "NoSchedule"
2222
containers:
2323
- name: loader
24-
image: ghcr.io/open-webui/open-webui:cuda@sha256:268d2b1b1f66f2d3f9a4ac3e0f06e0f545e9d6aaf71027115861874120e7edde
24+
image: ghcr.io/open-webui/open-webui:cuda@sha256:b5e6c753c8c43cdcb987910fbe257c31abef134d44bc84683775e6a1e4f3fafe
2525
command:
2626
- python3
2727
- -c

0 commit comments

Comments
 (0)