-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllm-chat.js
More file actions
127 lines (104 loc) · 3.24 KB
/
llm-chat.js
File metadata and controls
127 lines (104 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// ─── Browser LLM Chat (WebLLM Integration) ───
// Provides optional LLM-powered responses via WebGPU.
// Falls back gracefully to the rule-based chatbot when unavailable.
const LLMChat = (() => {
const MODEL_ID = 'SmolLM2-135M-Instruct-q0f32-MLC';
const MAX_HISTORY = 6;
const CDN_URL = 'https://esm.run/@mlc-ai/web-llm';
let engine = null;
let ready = false;
let loading = false;
let webllm = null;
let conversationHistory = [];
function checkWebGPUSupport() {
return !!navigator.gpu;
}
function buildSystemPrompt() {
const kb = window.knowledgeBase;
if (!kb) return 'You are a helpful portfolio assistant.';
const a = kb.about;
// Keep the prompt short — small models follow brief context better.
return `You are ${a.name}'s sassy, witty portfolio chatbot. Be funny and slightly sarcastic like a tech bro who loves dad jokes.
${a.name} is a ${a.role} with ${a.experience} experience in Bangalore, India.
Companies: Red Hat (current), Beem, Tata Digital, Founding Engineer at Gnosis Lab.
Skills: Python, Go, Snowflake, Databricks, Kubernetes, LangChain, LLMs, MCP.
Contact: ${a.email}, GitHub ${a.github}, LinkedIn ${a.linkedin}.
Answer in 1-3 short sentences with humor. Only use facts above. If unsure, make a joke about not knowing.`;
}
async function loadWebLLM() {
if (webllm) return webllm;
webllm = await import(CDN_URL);
return webllm;
}
async function initEngine(onProgress) {
if (ready || loading) return;
if (!checkWebGPUSupport()) {
throw new Error('WebGPU not supported');
}
loading = true;
try {
const lib = await loadWebLLM();
engine = await lib.CreateMLCEngine(MODEL_ID, {
initProgressCallback: (progress) => {
if (onProgress) onProgress(progress);
}
});
ready = true;
} catch (err) {
engine = null;
ready = false;
throw err;
} finally {
loading = false;
}
}
function isReady() {
return ready && engine !== null;
}
function isLoading() {
return loading;
}
function hasWebGPU() {
return checkWebGPUSupport();
}
function trimHistory() {
if (conversationHistory.length > MAX_HISTORY) {
conversationHistory = conversationHistory.slice(-MAX_HISTORY);
}
}
async function generate(userMessage, onToken) {
if (!isReady()) throw new Error('LLM engine not ready');
conversationHistory.push({ role: 'user', content: userMessage });
trimHistory();
const messages = [
{ role: 'system', content: buildSystemPrompt() },
...conversationHistory
];
let fullResponse = '';
const chunks = await engine.chat.completions.create({
messages,
temperature: 0.7,
max_tokens: 256,
stream: true,
});
for await (const chunk of chunks) {
const token = chunk.choices[0]?.delta?.content || '';
fullResponse += token;
if (onToken) onToken(token, fullResponse);
}
conversationHistory.push({ role: 'assistant', content: fullResponse });
trimHistory();
return fullResponse;
}
function resetHistory() {
conversationHistory = [];
}
return {
checkWebGPUSupport: hasWebGPU,
initEngine,
generate,
isReady,
isLoading,
resetHistory,
};
})();