Merge pull request #108 from ajcwebdev/docker

ajcwebdev · web-flow · commit 333e63f22689 · 2025-01-19T16:38:06.000-06:00
Improve Whisper Transcript Handling
diff --git a/.github/docker-entrypoint.sh b/.github/docker-entrypoint.sh
@@ -17,6 +17,9 @@ log_error() {
     exit 1
 }
 
+echo "Debug: (docker-entrypoint.sh) Checking /root/.ollama before starting Ollama..."
+ls -lR /root/.ollama || true
+
 # Start Ollama server in the background
 echo "Starting Ollama server..."
 ollama serve &
diff --git a/package.json b/package.json
@@ -42,7 +42,7 @@
     "docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
     "docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
     "docker-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -lh /usr/src/app && ls -lh /usr/src/app/whisper.cpp/models && ls -lh /usr/src/app/whisper.cpp/build/bin'",
-    "ollama-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -l /usr/local/bin/ollama && ls -lh /root/.ollama'",
+    "ollama-debug": "docker run --rm -it --entrypoint sh autoshow -c 'whoami && ls -l /root && ls -lR /root/.ollama && ls -l /usr/local/bin/ollama'",
     "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
     "bun": "bun --env-file=.env --no-warnings src/commander.ts",
     "deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/commander.ts"
diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts
@@ -13,7 +13,6 @@ import type { OllamaModelType, OllamaResponse } from '../utils/types/llms'
  *
  * @param {string} prompt - The prompt or instructions to process.
  * @param {string} transcript - The transcript text.
- * @param {string} tempPath - (unused) The temporary file path (no longer used).
  * @param {string | OllamaModelType} [model='QWEN_2_5_0B'] - The Ollama model to use.
  * @returns {Promise<string>} A Promise resolving with the generated text.
  */
@@ -26,22 +25,27 @@ export const callOllama = async (
   l.wait(`    - model: ${model}`)
 
   try {
+    // Determine the final modelKey from the argument
     const modelKey = typeof model === 'string' ? model : 'QWEN_2_5_0B'
     const modelConfig = OLLAMA_MODELS[modelKey as OllamaModelType] || OLLAMA_MODELS.QWEN_2_5_0B
     const ollamaModelName = modelConfig.modelId
 
     l.wait(`    - modelName: ${modelKey}\n    - ollamaModelName: ${ollamaModelName}`)
 
+    // Determine host/port from environment or fallback
     const ollamaHost = env['OLLAMA_HOST'] || 'localhost'
     const ollamaPort = env['OLLAMA_PORT'] || '11434'
-    l.wait(`\n  Using Ollama host: ${ollamaHost}, port: ${ollamaPort}`)
+    l.info(`\n  [callOllama] OLLAMA_HOST=${ollamaHost}, OLLAMA_PORT=${ollamaPort}`)
 
+    // Combine prompt + transcript
     const combinedPrompt = `${prompt}\n${transcript}`
 
+    // Ensure Ollama server is running and that the model is pulled
     await checkOllamaServerAndModel(ollamaHost, ollamaPort, ollamaModelName)
 
-    l.wait(`    - Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`)
+    l.wait(`\n  Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`)
 
+    // Make the actual request to Ollama
     const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
@@ -56,9 +60,11 @@ export const callOllama = async (
       throw new Error(`HTTP error! status: ${response.status}`)
     }
 
-    const data = await response.json() as OllamaResponse
+    // Parse returned JSON
+    const data = (await response.json()) as OllamaResponse
     const fullContent = data?.message?.content || ''
 
+    // Log token usage if provided by the server
     const totalPromptTokens = data.prompt_eval_count ?? 0
     const totalCompletionTokens = data.eval_count ?? 0
 
@@ -68,7 +74,7 @@ export const callOllama = async (
       tokenUsage: {
         input: totalPromptTokens || undefined,
         output: totalCompletionTokens || undefined,
-        total: totalPromptTokens + totalCompletionTokens || undefined,
+        total: (totalPromptTokens + totalCompletionTokens) || undefined,
       },
     })
 
diff --git a/src/transcription/format-transcript.ts b/src/transcription/format-transcript.ts
@@ -103,54 +103,108 @@ export function formatAssemblyTranscript(transcript: AssemblyAIPollingResponse,
  * Converts LRC content (common lyrics file format) to plain text with timestamps.
  * - Strips out lines that contain certain metadata (like [by:whisper.cpp]).
  * - Converts original timestamps [MM:SS.xx] to a simplified [MM:SS] format.
- * - Collapses lines with single or few words into lines of up to 15 words, retaining only the first timestamp
- *   among collapsed lines and removing subsequent timestamps.
+ * - Properly extracts all timestamps in each line, then merges them into
+ *   chunks of up to 15 words, adopting the newest timestamp as soon
+ *   as it appears.
  *
  * @param lrcContent - The content of the LRC file as a string
  * @returns The converted text content with simple timestamps
  */
 export function formatWhisperTranscript(lrcContent: string): string {
-  const lines = lrcContent.split('\n')
+  // 1) Remove lines like `[by:whisper.cpp]`, convert "[MM:SS.xx]" to "[MM:SS]"
+  const rawLines = lrcContent
+    .split('\n')
     .filter(line => !line.startsWith('[by:whisper.cpp]'))
-    .map(line => line.replace(/\[(\d{1,3}):(\d{2})(\.\d+)?\]/g, (_, p1, p2) => `[${p1}:${p2}]`))
+    .map(line =>
+      line.replace(
+        /\[(\d{1,3}):(\d{2})(\.\d+)?\]/g,
+        (_, minutes, seconds) => `[${minutes}:${seconds}]`
+      )
+    )
+
+  // We define a Segment with timestamp: string | undefined
+  type Segment = {
+    timestamp: string | undefined
+    words: string[]
+  }
+
+  /**
+   * Given a line (which may contain multiple [MM:SS] tags),
+   * extract those timestamps + the words in between.
+   */
+  function parseLineIntoSegments(line: string): Segment[] {
+    const segments: Segment[] = []
+    const pattern = /\[(\d{1,3}:\d{2})\]/g
+
+    let lastIndex = 0
+    let match: RegExpExecArray | null
+    let currentTimestamp: string | undefined = undefined
+
+    while ((match = pattern.exec(line)) !== null) {
+      // Text before this timestamp
+      const textBeforeThisTimestamp = line.slice(lastIndex, match.index).trim()
+      if (textBeforeThisTimestamp) {
+        segments.push({
+          timestamp: currentTimestamp,
+          words: textBeforeThisTimestamp.split(/\s+/).filter(Boolean),
+        })
+      }
+      // Update timestamp to the newly found one
+      currentTimestamp = match[1]
+      lastIndex = pattern.lastIndex
+    }
+
+    // After the last timestamp, grab any trailing text
+    const trailing = line.slice(lastIndex).trim()
+    if (trailing) {
+      segments.push({
+        timestamp: currentTimestamp,
+        words: trailing.split(/\s+/).filter(Boolean),
+      })
+    }
+
+    // If line had no timestamps, the entire line is one segment with `timestamp: undefined`.
+    return segments
+  }
+
+  // 2) Flatten all lines into an array of typed segments
+  const allSegments: Segment[] = rawLines.flatMap(line => parseLineIntoSegments(line))
 
+  // 3) Accumulate words into lines up to 15 words each.
+  //    Whenever we see a new timestamp, we finalize the previous chunk
+  //    and start a new chunk with that timestamp.
   const finalLines: string[] = []
-  let currentTimestamp = ''
+  let currentTimestamp: string | undefined = undefined
   let currentWords: string[] = []
 
-  lines.forEach(line => {
-    const match = line.match(/^\[(\d{1,3}:\d{2})\]\s*(.*)$/)
-    if (match) {
-      const timestamp = match[1] || ''
-      const text = match[2]
-      if (currentWords.length > 0) {
-        finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
-        currentWords = []
-      }
-      currentTimestamp = timestamp
-      const splitted = (text || '').split(/\s+/).filter(Boolean)
-      splitted.forEach(word => {
-        if (currentWords.length >= 15) {
-          finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
-          currentWords = []
-        }
-        currentWords.push(word)
-      })
-    } else {
-      const splitted = line.trim().split(/\s+/).filter(Boolean)
-      splitted.forEach(word => {
-        if (currentWords.length >= 15) {
-          finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
-          currentWords = []
-        }
-        currentWords.push(word)
-      })
+  function finalizeChunk() {
+    if (currentWords.length > 0) {
+      // If we have never encountered a timestamp, default to "00:00"
+      const tsToUse = currentTimestamp ?? '00:00'
+      finalLines.push(`[${tsToUse}] ${currentWords.join(' ')}`)
+      currentWords = []
     }
-  })
+  }
 
-  if (currentWords.length > 0) {
-    finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
+  for (const segment of allSegments) {
+    // If this segment has a new timestamp, finalize the old chunk and start new
+    if (segment.timestamp !== undefined) {
+      finalizeChunk()
+      currentTimestamp = segment.timestamp
+    }
+
+    // Accumulate words from this segment, chunking at 15
+    for (const word of segment.words) {
+      currentWords.push(word)
+      if (currentWords.length === 15) {
+        finalizeChunk()
+      }
+    }
   }
 
+  // 4) Finalize any leftover words
+  finalizeChunk()
+
+  // 5) Return as simple text
   return finalLines.join('\n')
 }
diff --git a/src/utils/validate-option.ts b/src/utils/validate-option.ts
@@ -119,19 +119,20 @@ export async function checkWhisperDirAndModel(
 /**
  * checkOllamaServerAndModel()
  * ---------------------
- * Checks if the Ollama server is running, attempts to start it if not running,
- * and ensures that the specified model is available. If not, it will pull the model.
+ * Checks if the Ollama server is running, attempts to start it if not,
+ * and ensures the specified model is available (pulling if needed).
  *
  * @param {string} ollamaHost - The Ollama host
  * @param {string} ollamaPort - The Ollama port
- * @param {string} ollamaModelName - The Ollama model name
+ * @param {string} ollamaModelName - The Ollama model name (e.g. 'qwen2.5:0.5b')
  * @returns {Promise<void>}
  */
 export async function checkOllamaServerAndModel(
   ollamaHost: string,
   ollamaPort: string,
   ollamaModelName: string
 ): Promise<void> {
+  // Helper to check if the Ollama server responds
   async function checkServer(): Promise<boolean> {
     try {
       const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`)
@@ -141,23 +142,29 @@ export async function checkOllamaServerAndModel(
     }
   }
 
+  l.info(`[checkOllamaServerAndModel] Checking server: http://${ollamaHost}:${ollamaPort}`)
+
+  // 1) Confirm the server is running
   if (await checkServer()) {
     l.wait('\n  Ollama server is already running...')
   } else {
+    // If the Docker-based environment uses 'ollama' as hostname but it's not up, that's likely an error
     if (ollamaHost === 'ollama') {
       throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.')
     } else {
-      l.wait('\n  Ollama server is not running. Attempting to start...')
+      // Attempt to spawn an Ollama server locally
+      l.wait('\n  Ollama server is not running. Attempting to start it locally...')
       const ollamaProcess = spawn('ollama', ['serve'], {
         detached: true,
         stdio: 'ignore',
       })
       ollamaProcess.unref()
 
+      // Wait up to ~30 seconds for the server to respond
       let attempts = 0
       while (attempts < 30) {
         if (await checkServer()) {
-          l.wait('    - Ollama server is now ready.')
+          l.wait('    - Ollama server is now ready.\n')
           break
         }
         await new Promise((resolve) => setTimeout(resolve, 1000))
@@ -169,17 +176,20 @@ export async function checkOllamaServerAndModel(
     }
   }
 
-  l.wait(`\n  Checking if model is available: ${ollamaModelName}`)
+  // 2) Confirm the model is available; if not, pull it
+  l.wait(`  Checking if model is available: ${ollamaModelName}`)
   try {
     const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`)
     if (!tagsResponse.ok) {
       throw new Error(`HTTP error! status: ${tagsResponse.status}`)
     }
+
     const tagsData = (await tagsResponse.json()) as OllamaTagsResponse
     const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName)
+    l.info(`[checkOllamaServerAndModel] isModelAvailable=${isModelAvailable}`)
 
     if (!isModelAvailable) {
-      l.wait(`\n  Model ${ollamaModelName} is not available, pulling...`)
+      l.wait(`\n  Model ${ollamaModelName} is NOT available; pulling now...`)
       const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
@@ -189,11 +199,13 @@ export async function checkOllamaServerAndModel(
         throw new Error(`Failed to initiate pull for model ${ollamaModelName}`)
       }
       if (!pullResponse.body) {
-        throw new Error('Response body is null')
+        throw new Error('Response body is null while pulling model.')
       }
 
       const reader = pullResponse.body.getReader()
       const decoder = new TextDecoder()
+
+      // Stream the JSON lines from the server
       while (true) {
         const { done, value } = await reader.read()
         if (done) break
@@ -202,6 +214,8 @@ export async function checkOllamaServerAndModel(
         const lines = chunk.split('\n')
         for (const line of lines) {
           if (line.trim() === '') continue
+
+          // Each line should be a JSON object from the Ollama server
           try {
             const parsedLine = JSON.parse(line)
             if (parsedLine.status === 'success') {

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,9 @@ log_error() {`
`17`	`17`	`exit 1`
`18`	`18`	`}`
`19`	`19`
	`20`	`+echo "Debug: (docker-entrypoint.sh) Checking /root/.ollama before starting Ollama..."`
	`21`	`+ls -lR /root/.ollama \|\| true`
	`22`	`+`
`20`	`23`	`# Start Ollama server in the background`
`21`	`24`	`echo "Starting Ollama server..."`
`22`	`25`	`ollama serve &`