-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathviiwork.yaml.example
More file actions
107 lines (99 loc) · 2.76 KB
/
viiwork.yaml.example
File metadata and controls
107 lines (99 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
server:
host: 0.0.0.0
port: 8080
model:
path: /models/gpt-oss-20b-Q4_K_M.gguf
context_size: 13337
n_gpu_layers: -1
gpus:
# Use 'devices' to specify exact GPU IDs (recommended for multi-instance):
# devices: [0, 1, 2]
# Or use 'count' for simple single-instance setups (uses GPUs 0..count-1):
count: 10
base_port: 9001
# power_limit_watts: 180 # optional: limit per-GPU power via rocm-smi
backend:
binary: llama-server
extra_args: ["--reasoning-format", "deepseek"] # enables <think> tag parsing for thinking models
health:
interval: 5s
timeout: 3s
max_failures: 3
balancer:
latency_window: 30s
high_load_threshold: 7
max_in_flight_per_gpu: 4
# Mesh: uncomment and list peer viiwork hosts to enable cluster mode.
# peers:
# hosts:
# - 192.168.1.10:8080
# - 192.168.1.11:8080
# poll_interval: 10s
# timeout: 3s
# Electricity cost tracking: uncomment and set ENTSOE_API_KEY in .env
# cost:
# bidding_zone: 10YFI-1--------U # Finland
# timezone: Europe/Helsinki
# transfer:
# winter:
# peak_cents_kwh: 4.28
# offpeak_cents_kwh: 2.49
# summer:
# flat_cents_kwh: 2.49
# electricity_tax_cents_kwh: 2.253
# vat_percent: 25.5
# Localization pipelines: virtual models that chain multiple real models.
# Consumer calls e.g. model "localize-fi" and gets culturally adapted text.
# pipelines:
# localize:
# locale_aliases:
# fi: fi-FI
# se: sv-SE
# pt: pt-BR
# es: es-MX
# fr: fr-FR
# locales:
# pt-BR:
# language: Portuguese
# audience: "Brazilian general audience"
# formality: informal
# glossary: glossaries/pt-br.yaml
# fi-FI:
# language: Finnish
# audience: "Finnish general audience"
# formality: formal
# glossary: glossaries/fi.yaml
# steps:
# - name: translate
# model: alma-r-13b
# prompt: prompts/translate.tmpl
# temperature: 0.1
# - name: localize
# model: qwen2.5-14b
# prompt: prompts/localize.tmpl
# temperature: 0.4
# - name: qc
# model: qwen2.5-7b
# prompt: prompts/qc.tmpl
# temperature: 0.1
# json_output: true
#
# # Text improvement pipeline: generates text then rewrites to remove AI patterns.
# # Consumer calls model "improve-default" (or "improve-en").
# improve:
# locale_aliases:
# en: default
# locales:
# default:
# language: English
# audience: "general reader"
# formality: neutral
# steps:
# - name: generate
# model: qwen2.5-14b
# prompt: prompts/generate.tmpl
# temperature: 0.7
# - name: improve
# model: qwen2.5-14b
# prompt: prompts/improve.tmpl
# temperature: 0.3