Skip to content

Commit 490aa0f

Browse files
fix: add markdown rendering to chat UI and update CUJ2 documentation (#159)
Signed-off-by: Yuan Chen <'"$EMAIL"'> Co-authored-by: Mark Chmarny <mchmarny@users.noreply.github.com>
1 parent f9f1ec0 commit 490aa0f

File tree

3 files changed

+204
-59
lines changed

3 files changed

+204
-59
lines changed

demos/cuj2.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,31 @@ vllm-agg-frontend-0 1/1 Running 0 2m
182182
vllm-agg-vllmdecodeworker-0 1/1 Running 0 2m
183183
```
184184

185+
### Architecture
186+
187+
```
188+
┌─────────┐ HTTP ┌────────────────┐ NATS ┌────────────────────┐
189+
│ Client │─────────▶│ Frontend │────────▶│ VllmDecodeWorker │
190+
│ (OpenAI │ :8000 │ │ :4222 │ │
191+
│ API) │◀─────────│ vllm-runtime │◀────────│ dynamo.vllm │
192+
└─────────┘ │ Qwen3-0.6B │ │ Qwen3-0.6B │
193+
│ │ │ 1x H100 GPU │
194+
│ CPU node │ │ GPU node │
195+
└────────────────┘ └────────────────────┘
196+
ip-100-64-83-166 ip-100-64-171-120
197+
svc: :8000 svc: :9090
198+
199+
Services:
200+
Frontend 1/1 Ready componentType: frontend
201+
VllmDecodeWorker 1/1 Ready componentType: worker gpu: 1
202+
203+
Flow:
204+
1. Client sends OpenAI request (/v1/chat/completions) → Frontend :8000
205+
2. Frontend dispatches inference work via NATS :4222
206+
3. VllmDecodeWorker runs Qwen/Qwen3-0.6B on H100, returns result
207+
4. Response streams back: Worker → NATS → Frontend → Client
208+
```
209+
185210
### Test the endpoint
186211

187212
#### Option 1: Chat UI (browser)

demos/workloads/inference/chat-server.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,17 @@ python3 -c "
4040
import http.server, urllib.request, io
4141
4242
API = 'http://127.0.0.1:${API_PORT}'
43-
HTML = open('$(dirname "$0")/chat.html', 'rb').read() if __import__('os').path.exists('$(dirname "$0")/chat.html') else b''
43+
HTML_PATH = '$(dirname "$0")/chat.html'
4444
4545
class H(http.server.BaseHTTPRequestHandler):
4646
def do_GET(self):
4747
if self.path == '/' or self.path == '/chat.html':
48+
html = open(HTML_PATH, 'rb').read() if __import__('os').path.exists(HTML_PATH) else b''
4849
self.send_response(200)
4950
self.send_header('Content-Type', 'text/html')
50-
self.send_header('Content-Length', len(HTML))
51+
self.send_header('Content-Length', len(html))
5152
self.end_headers()
52-
self.wfile.write(HTML)
53+
self.wfile.write(html)
5354
elif self.path.startswith('/v1/'):
5455
self._proxy()
5556
else:

demos/workloads/inference/chat.html

Lines changed: 175 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,20 @@
1616
.msg .avatar { width: 32px; height: 32px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 14px; flex-shrink: 0; }
1717
.msg.user .avatar { background: #533483; }
1818
.msg.assistant .avatar { background: #0f3460; }
19-
.msg .bubble { padding: 12px 16px; border-radius: 12px; line-height: 1.6; white-space: pre-wrap; word-break: break-word; }
19+
.msg .bubble { padding: 12px 16px; border-radius: 12px; line-height: 1.6; word-break: break-word; }
2020
.msg.user .bubble { background: #533483; border-bottom-right-radius: 4px; }
2121
.msg.assistant .bubble { background: #16213e; border: 1px solid #0f3460; border-bottom-left-radius: 4px; }
22-
.msg.assistant .bubble .thinking { color: #666; font-style: italic; font-size: 13px; }
22+
.msg.assistant .bubble .thinking { color: #888; font-style: italic; font-size: 13px; margin-bottom: 8px; padding: 8px; background: rgba(255,255,255,0.03); border-radius: 6px; }
23+
.bubble h1, .bubble h2, .bubble h3 { margin: 12px 0 6px; color: #fff; }
24+
.bubble h1 { font-size: 1.3em; } .bubble h2 { font-size: 1.15em; } .bubble h3 { font-size: 1.05em; }
25+
.bubble p { margin: 6px 0; }
26+
.bubble ul, .bubble ol { margin: 6px 0 6px 20px; }
27+
.bubble li { margin: 2px 0; }
28+
.bubble code { background: #0d1b2a; padding: 2px 6px; border-radius: 4px; font-size: 0.9em; }
29+
.bubble pre { background: #0d1b2a; padding: 12px; border-radius: 8px; overflow-x: auto; margin: 8px 0; }
30+
.bubble pre code { background: none; padding: 0; }
31+
.bubble strong { color: #fff; }
32+
.bubble hr { border: none; border-top: 1px solid #0f3460; margin: 12px 0; }
2333
#input-area { padding: 16px 24px; background: #16213e; border-top: 1px solid #0f3460; }
2434
#input-row { max-width: 720px; margin: 0 auto; display: flex; gap: 8px; }
2535
#input { flex: 1; padding: 12px 16px; border-radius: 12px; border: 1px solid #0f3460; background: #1a1a2e; color: #e0e0e0; font-size: 15px; outline: none; resize: none; font-family: inherit; }
@@ -41,96 +51,205 @@ <h1>Qwen3-0.6B Chat</h1>
4151
<div id="input-area">
4252
<div id="input-row">
4353
<textarea id="input" rows="1" placeholder="Type a message... (Shift+Enter for newline)" autofocus></textarea>
44-
<button id="send" onclick="send()">Send</button>
54+
<button id="send" onclick="sendMsg()">Send</button>
4555
</div>
4656
</div>
4757
<script>
48-
const API = '/v1/chat/completions';
49-
const MODEL = 'Qwen/Qwen3-0.6B';
50-
let messages = [];
51-
let sending = false;
58+
var API = '/v1/chat/completions';
59+
var MODEL = 'Qwen/Qwen3-0.6B';
60+
var messages = [];
61+
var sending = false;
5262

53-
const chat = document.getElementById('chat');
54-
const input = document.getElementById('input');
55-
const btn = document.getElementById('send');
63+
var chatEl = document.getElementById('chat');
64+
var inputEl = document.getElementById('input');
65+
var btnEl = document.getElementById('send');
5666

57-
input.addEventListener('keydown', e => {
58-
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
67+
inputEl.addEventListener('keydown', function(e) {
68+
if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); sendMsg(); }
5969
});
6070

61-
input.addEventListener('input', () => {
62-
input.style.height = 'auto';
63-
input.style.height = Math.min(input.scrollHeight, 120) + 'px';
71+
inputEl.addEventListener('input', function() {
72+
inputEl.style.height = 'auto';
73+
inputEl.style.height = Math.min(inputEl.scrollHeight, 120) + 'px';
6474
});
6575

76+
function escapeHtml(s) {
77+
return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;');
78+
}
79+
80+
function md(src) {
81+
// Extract code blocks first to protect them
82+
var codeBlocks = [];
83+
var text = src.replace(/```(\w*)\n([\s\S]*?)```/g, function(m, lang, code) {
84+
codeBlocks.push('<pre><code>' + escapeHtml(code) + '</code></pre>');
85+
return '\x00CB' + (codeBlocks.length - 1) + '\x00';
86+
});
87+
88+
// Process line by line
89+
var lines = text.split('\n');
90+
var html = [];
91+
var inList = false;
92+
93+
for (var i = 0; i < lines.length; i++) {
94+
var line = lines[i];
95+
96+
// Check for code block placeholder
97+
var cbMatch = line.match(/^\x00CB(\d+)\x00$/);
98+
if (cbMatch) {
99+
if (inList) { html.push('</ul>'); inList = false; }
100+
html.push(codeBlocks[parseInt(cbMatch[1])]);
101+
continue;
102+
}
103+
104+
// Headings
105+
var hMatch = line.match(/^(#{1,3})\s+(.+)$/);
106+
if (hMatch) {
107+
if (inList) { html.push('</ul>'); inList = false; }
108+
var level = hMatch[1].length;
109+
html.push('<h' + level + '>' + inlineFormat(hMatch[2]) + '</h' + level + '>');
110+
continue;
111+
}
112+
113+
// Horizontal rule
114+
if (line.match(/^---+$/)) {
115+
if (inList) { html.push('</ul>'); inList = false; }
116+
html.push('<hr>');
117+
continue;
118+
}
119+
120+
// Unordered list
121+
var liMatch = line.match(/^[-*]\s+(.+)$/);
122+
if (liMatch) {
123+
if (!inList) { html.push('<ul>'); inList = true; }
124+
html.push('<li>' + inlineFormat(liMatch[1]) + '</li>');
125+
continue;
126+
}
127+
128+
// Ordered list
129+
var olMatch = line.match(/^\d+\.\s+(.+)$/);
130+
if (olMatch) {
131+
if (!inList) { html.push('<ul>'); inList = true; }
132+
html.push('<li>' + inlineFormat(olMatch[1]) + '</li>');
133+
continue;
134+
}
135+
136+
// End list if we hit a non-list line
137+
if (inList) { html.push('</ul>'); inList = false; }
138+
139+
// Empty line = paragraph break
140+
if (line.trim() === '') {
141+
html.push('<br>');
142+
continue;
143+
}
144+
145+
// Regular text
146+
html.push('<p>' + inlineFormat(line) + '</p>');
147+
}
148+
149+
if (inList) html.push('</ul>');
150+
return html.join('\n');
151+
}
152+
153+
function inlineFormat(text) {
154+
// Inline code (protect first)
155+
var codes = [];
156+
text = text.replace(/`([^`]+)`/g, function(m, code) {
157+
codes.push('<code>' + escapeHtml(code) + '</code>');
158+
return '\x00IC' + (codes.length - 1) + '\x00';
159+
});
160+
161+
// Escape HTML in remaining text
162+
text = escapeHtml(text);
163+
164+
// Bold + italic
165+
text = text.replace(/\*\*\*(.+?)\*\*\*/g, '<strong><em>$1</em></strong>');
166+
text = text.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
167+
text = text.replace(/\*(.+?)\*/g, '<em>$1</em>');
168+
169+
// Restore inline code
170+
text = text.replace(/\x00IC(\d+)\x00/g, function(m, idx) {
171+
return codes[parseInt(idx)];
172+
});
173+
174+
return text;
175+
}
176+
66177
function addMsg(role, content) {
67-
const status = chat.querySelector('.status');
178+
var status = chatEl.querySelector('.status');
68179
if (status) status.remove();
69180

70-
const div = document.createElement('div');
71-
div.className = `msg ${role}`;
72-
const avatar = role === 'user' ? 'You' : 'AI';
73-
div.innerHTML = `<div class="avatar">${avatar}</div><div class="bubble"></div>`;
74-
chat.appendChild(div);
75-
chat.scrollTop = chat.scrollHeight;
181+
var div = document.createElement('div');
182+
div.className = 'msg ' + role;
183+
var avatar = role === 'user' ? 'You' : 'AI';
184+
div.innerHTML = '<div class="avatar">' + avatar + '</div><div class="bubble"></div>';
185+
chatEl.appendChild(div);
186+
chatEl.scrollTop = chatEl.scrollHeight;
76187

77-
const bubble = div.querySelector('.bubble');
188+
var bubble = div.querySelector('.bubble');
78189
if (content) setContent(bubble, content);
79190
return bubble;
80191
}
81192

82193
function setContent(bubble, text) {
83-
// Separate thinking from response
84-
const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/);
85-
let display = text.replace(/<think>[\s\S]*?<\/think>\s*/, '');
194+
// Strip <think>...</think> blocks (model reasoning)
195+
var thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/);
196+
var display = text.replace(/<think>[\s\S]*?<\/think>\s*/, '');
197+
86198
// Handle unclosed think tags (streaming)
87-
if (!display && text.includes('<think>')) {
88-
const after = text.split('<think>').pop();
89-
bubble.innerHTML = `<div class="thinking">${after.replace('</think>', '').trim() || 'Thinking...'}</div>`;
199+
if (!display && text.indexOf('<think>') !== -1) {
200+
var parts = text.split('<think>');
201+
var after = parts[parts.length - 1];
202+
bubble.innerHTML = '<div class="thinking">' + escapeHtml(after.replace('</think>', '').trim() || 'Thinking...') + '</div>';
90203
return;
91204
}
205+
206+
var rendered = md(display.trim() || text);
207+
92208
if (thinkMatch && thinkMatch[1].trim()) {
93-
bubble.innerHTML = `<div class="thinking">${thinkMatch[1].trim()}</div><br>${display.trim()}`;
209+
bubble.innerHTML = '<div class="thinking">' + escapeHtml(thinkMatch[1].trim()) + '</div>' + rendered;
94210
} else {
95-
bubble.textContent = display.trim() || text;
211+
bubble.innerHTML = rendered;
96212
}
97213
}
98214

99-
async function send() {
100-
const text = input.value.trim();
215+
function sendMsg() {
216+
var text = inputEl.value.trim();
101217
if (!text || sending) return;
102218

103219
sending = true;
104-
btn.disabled = true;
105-
input.value = '';
106-
input.style.height = 'auto';
220+
btnEl.disabled = true;
221+
inputEl.value = '';
222+
inputEl.style.height = 'auto';
107223

108224
addMsg('user', text);
109225
messages.push({ role: 'user', content: text });
110226

111-
const bubble = addMsg('assistant', '');
227+
var bubble = addMsg('assistant', '');
112228
bubble.textContent = 'Thinking...';
113229

114-
try {
115-
const res = await fetch(API, {
116-
method: 'POST',
117-
headers: { 'Content-Type': 'application/json' },
118-
body: JSON.stringify({ model: MODEL, messages, max_tokens: 512 }),
119-
});
120-
121-
if (!res.ok) throw new Error(`HTTP ${res.status}`);
122-
const data = await res.json();
123-
const content = data.choices[0].message.content;
124-
messages.push({ role: 'assistant', content });
230+
fetch(API, {
231+
method: 'POST',
232+
headers: { 'Content-Type': 'application/json' },
233+
body: JSON.stringify({ model: MODEL, messages: messages, max_tokens: 512 }),
234+
})
235+
.then(function(res) {
236+
if (!res.ok) throw new Error('HTTP ' + res.status);
237+
return res.json();
238+
})
239+
.then(function(data) {
240+
var content = data.choices[0].message.content;
241+
messages.push({ role: 'assistant', content: content });
125242
setContent(bubble, content);
126-
} catch (err) {
127-
bubble.textContent = `Error: ${err.message}. Make sure port-forward is running.`;
128-
}
129-
130-
sending = false;
131-
btn.disabled = false;
132-
chat.scrollTop = chat.scrollHeight;
133-
input.focus();
243+
})
244+
.catch(function(err) {
245+
bubble.textContent = 'Error: ' + err.message + '. Make sure port-forward is running.';
246+
})
247+
.finally(function() {
248+
sending = false;
249+
btnEl.disabled = false;
250+
chatEl.scrollTop = chatEl.scrollHeight;
251+
inputEl.focus();
252+
});
134253
}
135254
</script>
136255
</body>

0 commit comments

Comments
 (0)