arijitroy003.github.io/llm-chat.js at main · arijitroy003/arijitroy003.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// ─── Browser LLM Chat (WebLLM Integration) ───
// Provides optional LLM-powered responses via WebGPU.
// Falls back gracefully to the rule-based chatbot when unavailable.

const LLMChat = (() => {
  const MODEL_ID = 'SmolLM2-135M-Instruct-q0f32-MLC';
  const MAX_HISTORY = 6;
  const CDN_URL = 'https://esm.run/@mlc-ai/web-llm';

  let engine = null;
  let ready = false;
  let loading = false;
  let webllm = null;
  let conversationHistory = [];

  function checkWebGPUSupport() {
    return !!navigator.gpu;
  }

  function buildSystemPrompt() {
    const kb = window.knowledgeBase;
    if (!kb) return 'You are a helpful portfolio assistant.';

    const a = kb.about;

    // Keep the prompt short — small models follow brief context better.
    return `You are ${a.name}'s sassy, witty portfolio chatbot. Be funny and slightly sarcastic like a tech bro who loves dad jokes.
${a.name} is a ${a.role} with ${a.experience} experience in Bangalore, India.
Companies: Red Hat (current), Beem, Tata Digital, Founding Engineer at Gnosis Lab.
Skills: Python, Go, Snowflake, Databricks, Kubernetes, LangChain, LLMs, MCP.
Contact: ${a.email}, GitHub ${a.github}, LinkedIn ${a.linkedin}.
Answer in 1-3 short sentences with humor. Only use facts above. If unsure, make a joke about not knowing.`;
  }

  async function loadWebLLM() {
    if (webllm) return webllm;
    webllm = await import(CDN_URL);
    return webllm;
  }

  async function initEngine(onProgress) {
    if (ready || loading) return;
    if (!checkWebGPUSupport()) {
      throw new Error('WebGPU not supported');
    }

    loading = true;
    try {
      const lib = await loadWebLLM();
      engine = await lib.CreateMLCEngine(MODEL_ID, {
        initProgressCallback: (progress) => {
          if (onProgress) onProgress(progress);
        }
      });
      ready = true;
    } catch (err) {
      engine = null;
      ready = false;
      throw err;
    } finally {
      loading = false;
    }
  }

  function isReady() {
    return ready && engine !== null;
  }

  function isLoading() {
    return loading;
  }

  function hasWebGPU() {
    return checkWebGPUSupport();
  }

  function trimHistory() {
    if (conversationHistory.length > MAX_HISTORY) {
      conversationHistory = conversationHistory.slice(-MAX_HISTORY);
    }
  }

  async function generate(userMessage, onToken) {
    if (!isReady()) throw new Error('LLM engine not ready');

    conversationHistory.push({ role: 'user', content: userMessage });
    trimHistory();

    const messages = [
      { role: 'system', content: buildSystemPrompt() },
      ...conversationHistory
    ];

    let fullResponse = '';

    const chunks = await engine.chat.completions.create({
      messages,
      temperature: 0.7,
      max_tokens: 256,
      stream: true,
    });

    for await (const chunk of chunks) {
      const token = chunk.choices[0]?.delta?.content || '';
      fullResponse += token;
      if (onToken) onToken(token, fullResponse);
    }

    conversationHistory.push({ role: 'assistant', content: fullResponse });
    trimHistory();

    return fullResponse;
  }

  function resetHistory() {
    conversationHistory = [];
  }

  return {
    checkWebGPUSupport: hasWebGPU,
    initEngine,
    generate,
    isReady,
    isLoading,
    resetHistory,
  };
})();