Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 333e63f

Browse files
authoredJan 19, 2025··
Merge pull request #108 from ajcwebdev/docker
Improve Whisper Transcript Handling
2 parents 2ba9f2d + a69e806 commit 333e63f

File tree

5 files changed

+126
-49
lines changed

5 files changed

+126
-49
lines changed
 

‎.github/docker-entrypoint.sh

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ log_error() {
1717
exit 1
1818
}
1919

20+
echo "Debug: (docker-entrypoint.sh) Checking /root/.ollama before starting Ollama..."
21+
ls -lR /root/.ollama || true
22+
2023
# Start Ollama server in the background
2124
echo "Starting Ollama server..."
2225
ollama serve &

‎package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
"docker-cli": "docker run --rm --env-file .env -v $PWD/content:/usr/src/app/content autoshow",
4343
"docker-serve": "docker run -d -p 3000:3000 -v $PWD/content:/usr/src/app/content autoshow serve",
4444
"docker-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -lh /usr/src/app && ls -lh /usr/src/app/whisper.cpp/models && ls -lh /usr/src/app/whisper.cpp/build/bin'",
45-
"ollama-debug": "docker run --rm -it --entrypoint sh autoshow -c 'ls -l /usr/local/bin/ollama && ls -lh /root/.ollama'",
45+
"ollama-debug": "docker run --rm -it --entrypoint sh autoshow -c 'whoami && ls -l /root && ls -lR /root/.ollama && ls -l /usr/local/bin/ollama'",
4646
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
4747
"bun": "bun --env-file=.env --no-warnings src/commander.ts",
4848
"deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/commander.ts"

‎src/llms/ollama.ts

+11-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import type { OllamaModelType, OllamaResponse } from '../utils/types/llms'
1313
*
1414
* @param {string} prompt - The prompt or instructions to process.
1515
* @param {string} transcript - The transcript text.
16-
* @param {string} tempPath - (unused) The temporary file path (no longer used).
1716
* @param {string | OllamaModelType} [model='QWEN_2_5_0B'] - The Ollama model to use.
1817
* @returns {Promise<string>} A Promise resolving with the generated text.
1918
*/
@@ -26,22 +25,27 @@ export const callOllama = async (
2625
l.wait(` - model: ${model}`)
2726

2827
try {
28+
// Determine the final modelKey from the argument
2929
const modelKey = typeof model === 'string' ? model : 'QWEN_2_5_0B'
3030
const modelConfig = OLLAMA_MODELS[modelKey as OllamaModelType] || OLLAMA_MODELS.QWEN_2_5_0B
3131
const ollamaModelName = modelConfig.modelId
3232

3333
l.wait(` - modelName: ${modelKey}\n - ollamaModelName: ${ollamaModelName}`)
3434

35+
// Determine host/port from environment or fallback
3536
const ollamaHost = env['OLLAMA_HOST'] || 'localhost'
3637
const ollamaPort = env['OLLAMA_PORT'] || '11434'
37-
l.wait(`\n Using Ollama host: ${ollamaHost}, port: ${ollamaPort}`)
38+
l.info(`\n [callOllama] OLLAMA_HOST=${ollamaHost}, OLLAMA_PORT=${ollamaPort}`)
3839

40+
// Combine prompt + transcript
3941
const combinedPrompt = `${prompt}\n${transcript}`
4042

43+
// Ensure Ollama server is running and that the model is pulled
4144
await checkOllamaServerAndModel(ollamaHost, ollamaPort, ollamaModelName)
4245

43-
l.wait(` - Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`)
46+
l.wait(`\n Sending chat request to http://${ollamaHost}:${ollamaPort} using model '${ollamaModelName}'`)
4447

48+
// Make the actual request to Ollama
4549
const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, {
4650
method: 'POST',
4751
headers: { 'Content-Type': 'application/json' },
@@ -56,9 +60,11 @@ export const callOllama = async (
5660
throw new Error(`HTTP error! status: ${response.status}`)
5761
}
5862

59-
const data = await response.json() as OllamaResponse
63+
// Parse returned JSON
64+
const data = (await response.json()) as OllamaResponse
6065
const fullContent = data?.message?.content || ''
6166

67+
// Log token usage if provided by the server
6268
const totalPromptTokens = data.prompt_eval_count ?? 0
6369
const totalCompletionTokens = data.eval_count ?? 0
6470

@@ -68,7 +74,7 @@ export const callOllama = async (
6874
tokenUsage: {
6975
input: totalPromptTokens || undefined,
7076
output: totalCompletionTokens || undefined,
71-
total: totalPromptTokens + totalCompletionTokens || undefined,
77+
total: (totalPromptTokens + totalCompletionTokens) || undefined,
7278
},
7379
})
7480

‎src/transcription/format-transcript.ts

+89-35
Original file line numberDiff line numberDiff line change
@@ -103,54 +103,108 @@ export function formatAssemblyTranscript(transcript: AssemblyAIPollingResponse,
103103
* Converts LRC content (common lyrics file format) to plain text with timestamps.
104104
* - Strips out lines that contain certain metadata (like [by:whisper.cpp]).
105105
* - Converts original timestamps [MM:SS.xx] to a simplified [MM:SS] format.
106-
* - Collapses lines with single or few words into lines of up to 15 words, retaining only the first timestamp
107-
* among collapsed lines and removing subsequent timestamps.
106+
* - Properly extracts all timestamps in each line, then merges them into
107+
* chunks of up to 15 words, adopting the newest timestamp as soon
108+
* as it appears.
108109
*
109110
* @param lrcContent - The content of the LRC file as a string
110111
* @returns The converted text content with simple timestamps
111112
*/
112113
export function formatWhisperTranscript(lrcContent: string): string {
113-
const lines = lrcContent.split('\n')
114+
// 1) Remove lines like `[by:whisper.cpp]`, convert "[MM:SS.xx]" to "[MM:SS]"
115+
const rawLines = lrcContent
116+
.split('\n')
114117
.filter(line => !line.startsWith('[by:whisper.cpp]'))
115-
.map(line => line.replace(/\[(\d{1,3}):(\d{2})(\.\d+)?\]/g, (_, p1, p2) => `[${p1}:${p2}]`))
118+
.map(line =>
119+
line.replace(
120+
/\[(\d{1,3}):(\d{2})(\.\d+)?\]/g,
121+
(_, minutes, seconds) => `[${minutes}:${seconds}]`
122+
)
123+
)
124+
125+
// We define a Segment with timestamp: string | undefined
126+
type Segment = {
127+
timestamp: string | undefined
128+
words: string[]
129+
}
130+
131+
/**
132+
* Given a line (which may contain multiple [MM:SS] tags),
133+
* extract those timestamps + the words in between.
134+
*/
135+
function parseLineIntoSegments(line: string): Segment[] {
136+
const segments: Segment[] = []
137+
const pattern = /\[(\d{1,3}:\d{2})\]/g
138+
139+
let lastIndex = 0
140+
let match: RegExpExecArray | null
141+
let currentTimestamp: string | undefined = undefined
142+
143+
while ((match = pattern.exec(line)) !== null) {
144+
// Text before this timestamp
145+
const textBeforeThisTimestamp = line.slice(lastIndex, match.index).trim()
146+
if (textBeforeThisTimestamp) {
147+
segments.push({
148+
timestamp: currentTimestamp,
149+
words: textBeforeThisTimestamp.split(/\s+/).filter(Boolean),
150+
})
151+
}
152+
// Update timestamp to the newly found one
153+
currentTimestamp = match[1]
154+
lastIndex = pattern.lastIndex
155+
}
156+
157+
// After the last timestamp, grab any trailing text
158+
const trailing = line.slice(lastIndex).trim()
159+
if (trailing) {
160+
segments.push({
161+
timestamp: currentTimestamp,
162+
words: trailing.split(/\s+/).filter(Boolean),
163+
})
164+
}
165+
166+
// If line had no timestamps, the entire line is one segment with `timestamp: undefined`.
167+
return segments
168+
}
169+
170+
// 2) Flatten all lines into an array of typed segments
171+
const allSegments: Segment[] = rawLines.flatMap(line => parseLineIntoSegments(line))
116172

173+
// 3) Accumulate words into lines up to 15 words each.
174+
// Whenever we see a new timestamp, we finalize the previous chunk
175+
// and start a new chunk with that timestamp.
117176
const finalLines: string[] = []
118-
let currentTimestamp = ''
177+
let currentTimestamp: string | undefined = undefined
119178
let currentWords: string[] = []
120179

121-
lines.forEach(line => {
122-
const match = line.match(/^\[(\d{1,3}:\d{2})\]\s*(.*)$/)
123-
if (match) {
124-
const timestamp = match[1] || ''
125-
const text = match[2]
126-
if (currentWords.length > 0) {
127-
finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
128-
currentWords = []
129-
}
130-
currentTimestamp = timestamp
131-
const splitted = (text || '').split(/\s+/).filter(Boolean)
132-
splitted.forEach(word => {
133-
if (currentWords.length >= 15) {
134-
finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
135-
currentWords = []
136-
}
137-
currentWords.push(word)
138-
})
139-
} else {
140-
const splitted = line.trim().split(/\s+/).filter(Boolean)
141-
splitted.forEach(word => {
142-
if (currentWords.length >= 15) {
143-
finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
144-
currentWords = []
145-
}
146-
currentWords.push(word)
147-
})
180+
function finalizeChunk() {
181+
if (currentWords.length > 0) {
182+
// If we have never encountered a timestamp, default to "00:00"
183+
const tsToUse = currentTimestamp ?? '00:00'
184+
finalLines.push(`[${tsToUse}] ${currentWords.join(' ')}`)
185+
currentWords = []
148186
}
149-
})
187+
}
150188

151-
if (currentWords.length > 0) {
152-
finalLines.push(`[${currentTimestamp}] ${currentWords.join(' ')}`)
189+
for (const segment of allSegments) {
190+
// If this segment has a new timestamp, finalize the old chunk and start new
191+
if (segment.timestamp !== undefined) {
192+
finalizeChunk()
193+
currentTimestamp = segment.timestamp
194+
}
195+
196+
// Accumulate words from this segment, chunking at 15
197+
for (const word of segment.words) {
198+
currentWords.push(word)
199+
if (currentWords.length === 15) {
200+
finalizeChunk()
201+
}
202+
}
153203
}
154204

205+
// 4) Finalize any leftover words
206+
finalizeChunk()
207+
208+
// 5) Return as simple text
155209
return finalLines.join('\n')
156210
}

‎src/utils/validate-option.ts

+22-8
Original file line numberDiff line numberDiff line change
@@ -119,19 +119,20 @@ export async function checkWhisperDirAndModel(
119119
/**
120120
* checkOllamaServerAndModel()
121121
* ---------------------
122-
* Checks if the Ollama server is running, attempts to start it if not running,
123-
* and ensures that the specified model is available. If not, it will pull the model.
122+
* Checks if the Ollama server is running, attempts to start it if not,
123+
* and ensures the specified model is available (pulling if needed).
124124
*
125125
* @param {string} ollamaHost - The Ollama host
126126
* @param {string} ollamaPort - The Ollama port
127-
* @param {string} ollamaModelName - The Ollama model name
127+
* @param {string} ollamaModelName - The Ollama model name (e.g. 'qwen2.5:0.5b')
128128
* @returns {Promise<void>}
129129
*/
130130
export async function checkOllamaServerAndModel(
131131
ollamaHost: string,
132132
ollamaPort: string,
133133
ollamaModelName: string
134134
): Promise<void> {
135+
// Helper to check if the Ollama server responds
135136
async function checkServer(): Promise<boolean> {
136137
try {
137138
const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`)
@@ -141,23 +142,29 @@ export async function checkOllamaServerAndModel(
141142
}
142143
}
143144

145+
l.info(`[checkOllamaServerAndModel] Checking server: http://${ollamaHost}:${ollamaPort}`)
146+
147+
// 1) Confirm the server is running
144148
if (await checkServer()) {
145149
l.wait('\n Ollama server is already running...')
146150
} else {
151+
// If the Docker-based environment uses 'ollama' as hostname but it's not up, that's likely an error
147152
if (ollamaHost === 'ollama') {
148153
throw new Error('Ollama server is not running. Please ensure the Ollama server is running and accessible.')
149154
} else {
150-
l.wait('\n Ollama server is not running. Attempting to start...')
155+
// Attempt to spawn an Ollama server locally
156+
l.wait('\n Ollama server is not running. Attempting to start it locally...')
151157
const ollamaProcess = spawn('ollama', ['serve'], {
152158
detached: true,
153159
stdio: 'ignore',
154160
})
155161
ollamaProcess.unref()
156162

163+
// Wait up to ~30 seconds for the server to respond
157164
let attempts = 0
158165
while (attempts < 30) {
159166
if (await checkServer()) {
160-
l.wait(' - Ollama server is now ready.')
167+
l.wait(' - Ollama server is now ready.\n')
161168
break
162169
}
163170
await new Promise((resolve) => setTimeout(resolve, 1000))
@@ -169,17 +176,20 @@ export async function checkOllamaServerAndModel(
169176
}
170177
}
171178

172-
l.wait(`\n Checking if model is available: ${ollamaModelName}`)
179+
// 2) Confirm the model is available; if not, pull it
180+
l.wait(` Checking if model is available: ${ollamaModelName}`)
173181
try {
174182
const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`)
175183
if (!tagsResponse.ok) {
176184
throw new Error(`HTTP error! status: ${tagsResponse.status}`)
177185
}
186+
178187
const tagsData = (await tagsResponse.json()) as OllamaTagsResponse
179188
const isModelAvailable = tagsData.models.some((m) => m.name === ollamaModelName)
189+
l.info(`[checkOllamaServerAndModel] isModelAvailable=${isModelAvailable}`)
180190

181191
if (!isModelAvailable) {
182-
l.wait(`\n Model ${ollamaModelName} is not available, pulling...`)
192+
l.wait(`\n Model ${ollamaModelName} is NOT available; pulling now...`)
183193
const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, {
184194
method: 'POST',
185195
headers: { 'Content-Type': 'application/json' },
@@ -189,11 +199,13 @@ export async function checkOllamaServerAndModel(
189199
throw new Error(`Failed to initiate pull for model ${ollamaModelName}`)
190200
}
191201
if (!pullResponse.body) {
192-
throw new Error('Response body is null')
202+
throw new Error('Response body is null while pulling model.')
193203
}
194204

195205
const reader = pullResponse.body.getReader()
196206
const decoder = new TextDecoder()
207+
208+
// Stream the JSON lines from the server
197209
while (true) {
198210
const { done, value } = await reader.read()
199211
if (done) break
@@ -202,6 +214,8 @@ export async function checkOllamaServerAndModel(
202214
const lines = chunk.split('\n')
203215
for (const line of lines) {
204216
if (line.trim() === '') continue
217+
218+
// Each line should be a JSON object from the Ollama server
205219
try {
206220
const parsedLine = JSON.parse(line)
207221
if (parsedLine.status === 'success') {

0 commit comments

Comments
 (0)
Please sign in to comment.