Skip to content

This is a real-time conversational system for two-way speech communication with AI models, utilizing a continuous streaming architecture for fluid conversations with immediate responses and natural interruption handling. All components of this system are run locally [on CPU, in my test system

Notifications You must be signed in to change notification settings

MisterJody/Realtimeconversation

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 
 
 

Repository files navigation

const README_MD = `# On-Device Speech-to-Speech Conversational AI — Premium UI

This repo contains a single-page app that can run entirely client-side for demo, and optionally use a local backend proxy to talk to Ollama.

Getting Started

  • Demo Mode: open index.html or enable GitHub Pages (main branch, root folder).
  • Backend Mode (local Ollama):
    1. Install Node 18+ and Ollama.
    2. Pull a model, e.g.: ollama pull qwen2.5:0.5b-instruct-q8_0
    3. Start Ollama: ollama serve
    4. From this repo root, install and run the proxy: npm install npm run dev
    5. Visit http://localhost:3000 and click "Backend: Off" to toggle it ON.

Notes

  • The proxy avoids CORS and provides a stable streaming contract for the UI.
  • You can deploy the proxy behind the same origin as index.html for production.

License

  • MIT`;

    const SERVER_JS = `// Minimal local proxy for Ollama with streaming NDJSON // Run: npm run dev (starts on http://localhost:3000) // Endpoints: // - GET /api/health // - POST /api/generate body: { model, prompt, options } // Streams NDJSON lines: {"type":"token","content": "..."} and a final {"type":"end"} import express from 'express'; import fetch from 'node-fetch'; import path from 'path'; import { fileURLToPath } from 'url';

const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename);

const app = express(); app.use(express.json()); app.use(express.static(__dirname)); // serve index.html if placed here

const OLLAMA = process.env.OLLAMA_URL || 'http://127.0.0.1:11434';

app.get('/api/health', (req,res)=> res.json({ ok:true, ts: Date.now() }));

app.post('/api/generate', async (req, res) => { const { model='qwen2.5:0.5b-instruct-q8_0', prompt='', options={} } = req.body || {}; const body = { model, prompt, stream: true, options }; try { const r = await fetch(`${OLLAMA}/api/generate`, { method: 'POST', headers: { 'Content-Type':'application/json' }, body: JSON.stringify(body) }); if (!r.ok || !r.body) { res.status(502).json({ error: 'Bad response from Ollama' }); return; } res.setHeader('Content-Type', 'application/x-ndjson; charset=utf-8'); const reader = r.body.getReader(); const decoder = new TextDecoder(); let buf = ''; while (true) { const { value, done } = await reader.read(); if (done) break; buf += decoder.decode(value, { stream: true }); let idx; while ((idx = buf.indexOf('\n')) >= 0) { const line = buf.slice(0, idx).trim(); buf = buf.slice(idx+1); if (!line) continue; try { const obj = JSON.parse(line); if (typeof obj.response === 'string' && obj.response.length) { res.write(JSON.stringify({ type:'token', content: obj.response }) + '\n'); } if (obj.done) { res.write(JSON.stringify({ type:'end' }) + '\n'); } } catch (e) { // ignore } } } res.end(); } catch (e) { res.status(500).json({ error: String(e) }); } });

const port = process.env.PORT || 3000; app.listen(port, () => { console.log('[proxy] listening on http://localhost:' + port); });`;

const PACKAGE_JSON = `{

"name": "s2s-ollama-proxy", "version": "0.1.0", "type": "module", "private": true, "scripts": { "dev": "node server.js" }, "dependencies": { "express": "^4.19.2", "node-fetch": "^3.3.2" } }`;

function exportRepoScaffolding() {
  exportTextFile("README.md", README_MD);
  exportTextFile("server.js", SERVER_JS);
  exportTextFile("package.json", PACKAGE_JSON);
  toast("Scaffold files downloaded: README.md, server.js, package.json");
  log("ok","Repo scaffolding exported");
  addAudit("local","SCAFFOLD","Exported README, server, package.json");
}

// Celebrate
function celebrate() {
  const c = el.celebrate;
  const ctx = c.getContext("2d");
  c.width = innerWidth; c.height = innerHeight;
  const N = 80, parts = [];
  for (let i=0;i<N;i++){
    parts.push({
      x: Math.random()*c.width,
      y: -10 - Math.random()*80,
      r: 4+Math.random()*8,
      vx: (Math.random()-.5)*1.2,
      vy: 1+Math.random()*2,
      hue: 160 + Math.random()*160,
      a: .9
    });
  }
  c.classList.add("show");
  let t = 0;
  const anim = ()=>{
    ctx.clearRect(0,0,c.width,c.height);
    parts.forEach(p=>{
      p.vy += .02; p.x += p.vx; p.y += p.vy; p.a -= .006;
      ctx.fillStyle = `hsla(${p.hue},80%,70%,${Math.max(0,p.a)})`;
      ctx.beginPath(); ctx.arc(p.x,p.y,p.r,0,Math.PI*2); ctx.fill();
    });
    t++;
    if (t<220) requestAnimationFrame(anim);
    else c.classList.remove("show");
  };
  anim();
}

// Backend toggle helpers
function updateBackendBadge(){
  const label = document.getElementById("backendState");
  label.textContent = state.backendOn ? (state.serverHealthy ? "On" : "On (pending)") : "Off";
  btnToggleBackend.textContent = "Backend: " + (state.backendOn ? "On" : "Off");
}
async function checkHealth(){
  try{
    const r = await fetch(`${state.backendBase}/api/health`, { cache: "no-store" });
    state.serverHealthy = r.ok;
  } catch { state.serverHealthy = false; }
  updateBackendBadge();
}

// UI bindings
el.btnStart.addEventListener("click", async ()=>{
  if (state.running) return;
  setRunning(true);
  toast("Voice Chat Bot Ready");
  log("ok","Voice Chat Bot Ready");
  await runTurn("How does your pipeline reduce latency?");
});
el.btnStop.addEventListener("click", ()=>{
  if (!state.running) return;
  setRunning(false);
  stopSpeak();
  toast("Session stopped");
  log("warn","Session stopped by user");
});
el.btnDocs.addEventListener("click", ()=>{
  document.getElementById("docs").scrollIntoView({behavior:"smooth", block:"start"});
  toast("Opened Quickstart");
});

btnToggleBackend.addEventListener("click", async ()=>{
  state.backendOn = !state.backendOn;
  if (state.backendOn) {
    await checkHealth();
    if (!state.serverHealthy) {
      toast("Proxy not detected at /api/* — run npm run dev");
      log("warn","Proxy not detected at /api/* — starting in fallback simulator mode");
    } else {
      toast("Backend ON — streaming via local proxy");
      log("ok","Backend enabled: using Ollama proxy");
    }
  } else {
    toast("Backend OFF — using simulators");
    log("info","Backend disabled; simulators engaged");
  }
  updateBackendBadge();
});

btnScaffold.addEventListener("click", exportRepoScaffolding);

// Controls
el.switchVAD.addEventListener("click", ()=>{
  state.vad = !state.vad;
  el.switchVAD.dataset.on = state.vad;
  el.switchVAD.setAttribute("aria-checked", String(state.vad));
  log("info","VAD "+(state.vad?"enabled":"disabled"));
  updateStats();
});
el.switchFiller.addEventListener("click", ()=>{
  state.filler = !state.filler;
  el.switchFiller.dataset.on = state.filler;
  el.switchFiller.setAttribute("aria-checked", String(state.filler));
  log("info","Filler prompting "+(state.filler?"enabled":"disabled"));
});
el.chunkWords.addEventListener("input", e=>{
  state.chunkWords = parseInt(e.target.value,10);
  el.meterChunk.style.width = (100*(state.chunkWords-4)/(22-4))+"%";
});
el.wpm.addEventListener("input", e=>{
  state.wps = parseInt(e.target.value,10);
  el.meterWpm.style.width = (100*(state.wps-4)/(10-4))+"%";
});
el.queueCap.addEventListener("change", e=>{
  state.queueCap = parseInt(e.target.value,10);
  updateStats();
});
el.voicePick.addEventListener("change", e=>{
  state.voice = e.target.value;
  toast("Voice set to "+state.voice);
});

el.btnExport.addEventListener("click", exportLogs);
el.btnPrint.addEventListener("click", ()=> window.print());
el.searchLogs.addEventListener("input", applyLogFilter);
document.addEventListener("keydown", (e)=>{
  if (e.key==="/"){ e.preventDefault(); el.searchLogs.focus(); }
  if (e.key==="?" || (e.shiftKey && e.key==="/")) {
    e.preventDefault(); el.modal.classList.toggle("open");
  }
  if (e.key===" "){
    el.shortcutHint.hidden = false;
  }
  if (e.key.toLowerCase()==="s"){ setRunning(true); toast("Session started"); }
  if (e.key.toLowerCase()==="x"){ setRunning(false); stopSpeak(); toast("Session stopped"); }
  if ((e.metaKey||e.ctrlKey) && e.key.toLowerCase()==="k"){ e.preventDefault(); state.logs=[]; applyLogFilter(); toast("Logs cleared"); }
  if ((e.metaKey||e.ctrlKey) && e.key.toLowerCase()==="e"){ e.preventDefault(); exportLogs(); }
});
document.addEventListener("keyup", (e)=>{
  if (e.key===" "){
    el.shortcutHint.hidden = true;
    if (state.running){
      const phrases = [
        "Can you summarize the architecture?",
        "What makes the latency low?",
        "How do interruptions work?",
        "Explain the chunking strategy.",
        "What are the model components?"
      ];
      const pick = phrases[Math.floor(Math.random()*phrases.length)];
      runTurn(pick);
    } else {
      toast("Start session to talk");
    }
  }
});
el.modal.addEventListener("click", (e)=>{ if (e.target === el.modal) el.modal.classList.remove("open"); });

// periodic stats + mic meter to animate meters
setInterval(()=>{
  if (state.running) {
    const lvl = fakeMicLevel();
    el.meterWpm.style.width = Math.round(100*lvl)+"%";
    el.meterChunk.style.width = Math.round(100*Math.min(1, state.chunkWords/22))+"%";
    updateStats();
  }
}, 1000);

// Initial content
addAudit("local","INIT","UI loaded");
log("info","Welcome. Push this file to GitHub and enable Pages for a live demo.");
applyLogFilter();
updateStats();
</script>

About

This is a real-time conversational system for two-way speech communication with AI models, utilizing a continuous streaming architecture for fluid conversations with immediate responses and natural interruption handling. All components of this system are run locally [on CPU, in my test system

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages