Skip to content

Commit 35a2d17

Browse files
committed
feat: combine consecutive citations
1 parent e5806bb commit 35a2d17

8 files changed

Lines changed: 62 additions & 32 deletions

File tree

common/services/transcription_handler_service.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from common.services.exceptions import InteractionFailedError, TranscriptionFailedError
1313
from common.services.transcription_services.transcription_manager import TranscriptionServiceManager
1414
from common.settings import get_settings
15+
from common.templates.template_utils import combine_consecutive_citations
1516
from common.types import DialogueEntry, TranscriptionJobMessageData
1617

1718
settings = get_settings()
@@ -76,6 +77,7 @@ async def process_interactive_message(chat_id: UUID) -> None:
7677
)
7778

7879
chat_response = await chatbot.chat(messages=chat_history)
80+
chat_response = combine_consecutive_citations(chat_response)
7981
chat.assistant_content = chat_response
8082
chat.status = JobStatus.COMPLETED
8183

common/templates/add_citations.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import re
2+
3+
from common.database.postgres_models import DialogueEntry
4+
from common.llm.client import FastOrBestLLM, create_default_chatbot
5+
from common.prompts import get_citations_prompt
6+
7+
8+
async def add_citations_to_minute(
9+
transcript: list[DialogueEntry],
10+
initial_draft: str,
11+
) -> str:
12+
chatbot = create_default_chatbot(FastOrBestLLM.FAST)
13+
messages = get_citations_prompt(initial_draft, transcript)
14+
15+
minute = await chatbot.chat(messages)
16+
17+
minute = combine_consecutive_citations(minute)
18+
19+
return minute or ""
20+
21+
22+
def combine_consecutive_citations(minute: str) -> str:
23+
cluster_pattern = re.compile(r"(\[\d+\])+")
24+
matches = cluster_pattern.finditer(minute)
25+
for match in matches:
26+
citation_cluster = match.group()
27+
citation_pattern = re.compile(r"\d+")
28+
numbers = [int(n.group()) for n in citation_pattern.finditer(citation_cluster)]
29+
numbers.sort()
30+
# Extract individual numbers from the cluster
31+
groups = []
32+
for number in numbers:
33+
if len(groups) == 0 or abs(groups[-1][-1] - number) > 2:
34+
groups.append([number])
35+
else:
36+
groups[-1].append(number)
37+
38+
out = ""
39+
for citation_group in groups:
40+
if len(citation_group) == 1:
41+
out += f"[{citation_group[0]}]"
42+
else:
43+
out += f"[{citation_group[0]}-{citation_group[-1]}]"
44+
minute = minute.replace(citation_cluster, out)
45+
return minute

common/templates/template_utils.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

frontend/app/transcriptions/[transcriptionId]/MinuteTab/components/editor/tiptap-editor.tsx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { useCallback, useEffect } from 'react'
1414

1515
import { CitationPopoverWrapper } from '@/components/ui/citation-popover-wrapper'
1616
import { useCitationPopover } from '@/hooks/use-citation-popover'
17+
import { citationRegex, citationRegexWithSpace } from '@/lib/citationRegex'
1718
import { Transcription } from '@/lib/client'
1819
import { cn } from '@/lib/utils'
1920
import posthog from 'posthog-js'
@@ -58,7 +59,7 @@ function SimpleEditor({
5859
props: {
5960
decorations(state) {
6061
const decorations: Decoration[] = []
61-
const citationRegex = /(\s?)\[(\d+)\]/g
62+
const citationRegex = citationRegexWithSpace
6263

6364
state.doc.descendants((node, pos) => {
6465
if (node.isText) {
@@ -93,7 +94,7 @@ function SimpleEditor({
9394
const domNode = event.target as HTMLElement
9495

9596
if (domNode.classList.contains('citation-link')) {
96-
const match = domNode.textContent?.match(/\[(\d+)\]/)
97+
const match = domNode.textContent?.match(citationRegex)
9798
if (match) {
9899
const index = parseInt(match[1], 10)
99100
const rect = domNode.getBoundingClientRect()

frontend/app/transcriptions/[transcriptionId]/MinuteTab/minute-editor/minute-editor.tsx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { MinuteVersionSelect } from '@/app/transcriptions/[transcriptionId]/Minu
77
import { NewMinuteDialog } from '@/app/transcriptions/[transcriptionId]/MinuteTab/NewMinuteDialog'
88
import { Button } from '@/components/ui/button'
99
import CopyButton from '@/components/ui/copy-button'
10+
import { citationRegex, citationRegexWithSpace } from '@/lib/citationRegex'
1011
import {
1112
MinuteListItem,
1213
MinuteVersionResponse,
@@ -86,10 +87,10 @@ export function MinuteEditor({
8687
}, [form, minuteVersion])
8788
const htmlContent = form.watch('html')
8889
const contentToCopy = useMemo(() => {
89-
return htmlContent?.replaceAll(/\s?\[(\d+)\]/g, '') || ''
90+
return htmlContent?.replaceAll(citationRegexWithSpace, '') || ''
9091
}, [htmlContent])
9192
const hasCitations = useMemo(() => {
92-
return !!htmlContent?.match(/\[(\d+)\]/)
93+
return !!htmlContent?.match(citationRegex)
9394
}, [htmlContent])
9495
useEffect(() => {}, [htmlContent])
9596
const { mutate: saveEdit } = useMutation({

frontend/lib/citationRegex.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export const citationRegexWithSpace = /(\s?)\[(\d+)(?:-\d+)?\]/g
2+
export const citationRegex = /\[(\d+)(?:-\d+)?\]/

frontend/lib/download-word-doc.ts

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { citationRegexWithSpace } from '@/lib/citationRegex'
12
import { DialogueEntry } from '@/lib/client'
23
import { saveAs } from 'file-saver'
34
import { asBlob } from 'html-docx-js-typescript'
@@ -110,19 +111,12 @@ async function convertHTMLToWordAndDownload(
110111
saveAs(blob, fileName)
111112
}
112113

113-
export function removeCitations(text: string): string {
114-
const citationRegex = /\s*\[\d+\](\s*\.)?/g
115-
return text.replace(citationRegex, (match, punctuation) => {
116-
return punctuation ? '.' : ''
117-
})
118-
}
119-
120114
async function convertAIMinutesToWordDoc(
121115
html: string,
122116
transcript: DialogueEntry[],
123117
fileName: string = 'document.docx'
124118
): Promise<void> {
125-
const cleanedHTML = removeCitations(html)
119+
const cleanedHTML = html.replace(citationRegexWithSpace, '')
126120
await convertHTMLToWordAndDownload(cleanedHTML, transcript, fileName)
127121
}
128122

frontend/utils/citation-renderer.tsx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
1+
import { citationRegex } from '@/lib/citationRegex'
2+
13
export function linkCitations(content: string): string {
24
const parts: string[] = []
3-
const regex = /\[(\d+)\]/g
45
let lastIndex = 0
56
let match: RegExpExecArray | null
6-
7+
const regex = new RegExp(citationRegex, 'g')
78
while ((match = regex.exec(content)) !== null) {
89
const idx = match.index
910
if (idx > lastIndex) {
1011
parts.push(content.slice(lastIndex, idx))
1112
}
1213
const citationNumber = parseInt(match[1], 10)
1314
const text = match[0]
14-
parts.push(`[[${citationNumber}]](${citationNumber})`)
15+
parts.push(`[${text}](${citationNumber})`)
1516
lastIndex = idx + text.length
1617
}
1718
if (lastIndex < content.length) {
@@ -32,7 +33,6 @@ export function CitationContent({
3233
const regex = /^\d+$/
3334
let match: RegExpExecArray | null
3435
match = regex.exec(href || '')
35-
3636
if (match !== null) {
3737
const idx = match.index
3838
const text = match[0]
@@ -46,7 +46,7 @@ export function CitationContent({
4646
onCitationClick(citationNumber, rect)
4747
}}
4848
>
49-
[{text}]
49+
{linkChildren}
5050
</span>
5151
)
5252
}

0 commit comments

Comments
 (0)