Skip to content

Commit 07e5336

Browse files
kruscheclaude
andauthored
Auto-fill thesis abstract from uploaded PDFs (#1127)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent a2fcb75 commit 07e5336

21 files changed

Lines changed: 1903 additions & 2 deletions

File tree

client/e2e/helpers.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,3 +235,42 @@ export function createTestPdfBuffer(): Buffer {
235235
'trailer\n<< /Size 4 /Root 1 0 R >>\nstartxref\n206\n%%EOF',
236236
)
237237
}
238+
239+
/**
240+
* Create a valid PDF buffer with a real "Abstract" section so the server's abstract
241+
* extractor can confidently extract it. The body contains a line-end hyphenation
242+
* ("com-" / "prehensive") that must be rejoined into "comprehensive", and a
243+
* "1 Introduction" heading that bounds the abstract.
244+
*/
245+
export function createAbstractTestPdfBuffer(): Buffer {
246+
const content =
247+
'BT /F1 14 Tf 72 720 Td (Abstract) Tj ET\n' +
248+
'BT /F1 10 Tf 72 695 Td (This thesis presents a com-) Tj ET\n' +
249+
'BT /F1 10 Tf 72 680 Td (prehensive evaluation of automated review systems.) Tj ET\n' +
250+
'BT /F1 14 Tf 72 650 Td (1 Introduction) Tj ET\n' +
251+
'BT /F1 10 Tf 72 625 Td (The introduction begins here with more detail.) Tj ET\n'
252+
253+
const objects = [
254+
'<< /Type /Catalog /Pages 2 0 R >>',
255+
'<< /Type /Pages /Kids [3 0 R] /Count 1 >>',
256+
'<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>',
257+
'<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>',
258+
`<< /Length ${Buffer.byteLength(content)} >>\nstream\n${content}endstream`,
259+
]
260+
261+
let pdf = '%PDF-1.4\n'
262+
const offsets: number[] = []
263+
objects.forEach((body, index) => {
264+
offsets.push(Buffer.byteLength(pdf))
265+
pdf += `${index + 1} 0 obj\n${body}\nendobj\n`
266+
})
267+
268+
const xrefOffset = Buffer.byteLength(pdf)
269+
pdf += `xref\n0 ${objects.length + 1}\n0000000000 65535 f \n`
270+
offsets.forEach((offset) => {
271+
pdf += `${String(offset).padStart(10, '0')} 00000 n \n`
272+
})
273+
pdf += `trailer\n<< /Size ${objects.length + 1} /Root 1 0 R >>\nstartxref\n${xrefOffset}\n%%EOF`
274+
275+
return Buffer.from(pdf, 'latin1')
276+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import { test, expect, type Page } from '@playwright/test'
2+
import {
3+
authStatePath,
4+
navigateTo,
5+
expandAccordion,
6+
hideWebpackOverlay,
7+
createAbstractTestPdfBuffer,
8+
} from '../helpers'
9+
10+
// Seeded thesis in WRITING state owned by the "student" user (same thesis the
11+
// thesis file upload test uses). Uploading a thesis document triggers the
12+
// server-side abstract extraction.
13+
const THESIS_1_ID = '00000000-0000-4000-d000-000000000001'
14+
const THESIS_1_URL = `/theses/${THESIS_1_ID}`
15+
16+
const EXTRACTED = /comprehensive evaluation of automated review systems/i
17+
18+
async function uploadThesisPdf(page: Page) {
19+
await expandAccordion(page, 'Thesis', page.getByText('Files').first())
20+
await expandAccordion(page, 'Files', page.getByRole('button', { name: 'Upload Thesis' }))
21+
22+
const uploadThesisButton = page.getByRole('button', { name: 'Upload Thesis' })
23+
await uploadThesisButton.scrollIntoViewIfNeeded()
24+
await uploadThesisButton.click()
25+
26+
// Scope to the upload dialog by name: after upload the confirmation modal opens, so a bare
27+
// getByRole('dialog') would match two dialogs.
28+
const dialog = page.getByRole('dialog', { name: 'File Upload' })
29+
await expect(dialog).toBeVisible({ timeout: 10_000 })
30+
await dialog.locator('input[type="file"]').setInputFiles({
31+
name: 'thesis-with-abstract.pdf',
32+
mimeType: 'application/pdf',
33+
buffer: createAbstractTestPdfBuffer(),
34+
})
35+
36+
const uploadFileButton = dialog.getByRole('button', { name: 'Upload File' })
37+
await expect(uploadFileButton).toBeEnabled({ timeout: 10_000 })
38+
await uploadFileButton.click()
39+
await expect(dialog).toBeHidden({ timeout: 10_000 })
40+
}
41+
42+
test.describe('Abstract auto-extraction - Student', () => {
43+
test.use({ storageState: authStatePath('student') })
44+
45+
test('auto-fills a blank abstract, then asks via a modal before replacing an existing one', async ({
46+
page,
47+
}) => {
48+
test.setTimeout(120_000)
49+
50+
await navigateTo(page, THESIS_1_URL)
51+
await hideWebpackOverlay(page)
52+
await expect(page.getByRole('heading', { name: /automated code review/i })).toBeVisible({
53+
timeout: 30_000,
54+
})
55+
56+
// 1) The seeded abstract is blank, so a confident extraction fills it silently — no modal.
57+
// The line-end hyphenation is rejoined ("com-" + "prehensive" => "comprehensive").
58+
await uploadThesisPdf(page)
59+
await expect(page.getByText(EXTRACTED)).toBeVisible({ timeout: 15_000 })
60+
61+
// 2) Replace the abstract with a manual edit; a later upload must NOT overwrite it silently.
62+
await page.getByRole('button', { name: 'Edit' }).first().click()
63+
const abstractEditor = page
64+
.locator('.mantine-InputWrapper-root', { hasText: 'Abstract' })
65+
.locator('.ProseMirror')
66+
.first()
67+
await abstractEditor.click()
68+
await page.keyboard.press('ControlOrMeta+A')
69+
await page.keyboard.press('Backspace')
70+
await abstractEditor.pressSequentially('A manually written abstract that must not be overwritten.')
71+
await page.getByRole('button', { name: 'Save' }).click()
72+
await expect(page.getByText(/manually written abstract that must not be overwritten/i)).toBeVisible({
73+
timeout: 10_000,
74+
})
75+
76+
// 3) Uploading again surfaces a confirmation modal instead of overwriting the manual abstract.
77+
await uploadThesisPdf(page)
78+
const modal = page.getByRole('dialog', { name: /use the abstract extracted/i })
79+
await expect(modal).toBeVisible({ timeout: 15_000 })
80+
81+
// 4) Confirming replaces the abstract with the extracted text.
82+
await modal.getByRole('button', { name: 'Use extracted abstract' }).click()
83+
await expect(page.getByText(EXTRACTED)).toBeVisible({ timeout: 10_000 })
84+
})
85+
})

client/src/thesis/pages/ThesisPage/components/ThesisInfoSection/ThesisInfoSection.tsx

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
import { Link } from 'react-router'
1111
import { ApiError } from '@/core/requests/handler'
1212
import DownloadAllFilesButton from '@/thesis/pages/ThesisPage/components/ThesisInfoSection/components/DownloadAllFilesButton/DownloadAllFilesButton'
13+
import AbstractSuggestionModal from '@/thesis/pages/ThesisPage/components/ThesisInfoSection/components/AbstractSuggestionModal/AbstractSuggestionModal'
1314
import { isThesisClosed } from '@/thesis/utils/thesis'
1415
import { GLOBAL_CONFIG } from '@/core/config/global'
1516
import { formatLanguage } from '@/core/utils/format'
@@ -54,6 +55,35 @@ const ThesisInfoSection = () => {
5455
}
5556
}, 'Thesis info updated successfully')
5657

58+
const [accepting, onUseSuggestion] = useThesisUpdateAction(async () => {
59+
const response = await doRequest<IThesis>(
60+
`/v2/theses/${thesis.thesisId}/abstract-suggestion/accept`,
61+
{ method: 'POST', requiresAuth: true },
62+
)
63+
64+
if (response.ok) {
65+
return response.data
66+
} else {
67+
throw new ApiError(response)
68+
}
69+
}, 'Abstract updated from the suggestion')
70+
71+
const [dismissing, onDismissSuggestion] = useThesisUpdateAction(async () => {
72+
const response = await doRequest<IThesis>(
73+
`/v2/theses/${thesis.thesisId}/abstract-suggestion/dismiss`,
74+
{ method: 'POST', requiresAuth: true },
75+
)
76+
77+
if (response.ok) {
78+
return response.data
79+
} else {
80+
throw new ApiError(response)
81+
}
82+
}, 'Abstract suggestion dismissed')
83+
84+
const showSuggestion =
85+
access.student && !editMode && !isThesisClosed(thesis) && !!thesis.abstractSuggestion
86+
5787
return (
5888
<Accordion variant='separated' defaultValue='open'>
5989
<Accordion.Item value='open'>
@@ -83,6 +113,14 @@ const ThesisInfoSection = () => {
83113
))}
84114
</Stack>
85115
)}
116+
<AbstractSuggestionModal
117+
opened={showSuggestion}
118+
currentAbstract={thesis.abstractText ?? ''}
119+
suggestion={thesis.abstractSuggestion ?? ''}
120+
loading={accepting || dismissing}
121+
onConfirm={onUseSuggestion}
122+
onDeny={onDismissSuggestion}
123+
/>
86124
<DocumentEditor
87125
label='Abstract'
88126
value={abstractText}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import { Button, Group, Modal, Stack, Text } from '@mantine/core'
2+
import { Sparkle } from '@phosphor-icons/react'
3+
import DocumentEditor from '@/core/components/DocumentEditor/DocumentEditor'
4+
5+
interface IAbstractSuggestionModalProps {
6+
opened: boolean
7+
currentAbstract: string
8+
suggestion: string
9+
loading: boolean
10+
onConfirm: () => void
11+
onDeny: () => void
12+
}
13+
14+
// Detect visible text by parsing the HTML and reading its text content, rather than stripping
15+
// tags with a regex (which is incomplete and flagged as unsafe sanitization). The parsed document
16+
// is detached and inert, so no scripts run.
17+
const hasText = (html: string) =>
18+
(new DOMParser().parseFromString(html, 'text/html').body.textContent ?? '').trim().length > 0
19+
20+
/**
21+
* Shown right after an upload when an abstract was extracted that would replace the current one,
22+
* so the student explicitly confirms or denies the change rather than overlooking an inline hint.
23+
* Closing the modal denies the change and keeps the current abstract.
24+
*/
25+
const AbstractSuggestionModal = (props: IAbstractSuggestionModalProps) => {
26+
const { opened, currentAbstract, suggestion, loading, onConfirm, onDeny } = props
27+
28+
return (
29+
<Modal
30+
opened={opened}
31+
onClose={() => {
32+
// Don't let a close-based deny fire while an accept/deny request is already in flight.
33+
if (!loading) {
34+
onDeny()
35+
}
36+
}}
37+
size='lg'
38+
closeOnClickOutside={false}
39+
closeOnEscape={!loading}
40+
withCloseButton={!loading}
41+
title={
42+
<Group gap='xs'>
43+
<Sparkle size={18} />
44+
<Text fw={600}>Use the abstract extracted from your upload?</Text>
45+
</Group>
46+
}
47+
>
48+
<Stack>
49+
<Text size='sm'>
50+
We extracted this abstract from the document you just uploaded. Replace the current
51+
abstract with it, or keep the one you have.
52+
</Text>
53+
<DocumentEditor label='Extracted from your upload' value={suggestion} editMode={false} />
54+
{hasText(currentAbstract) && (
55+
<DocumentEditor
56+
label='Current abstract (kept if you decline)'
57+
value={currentAbstract}
58+
editMode={false}
59+
/>
60+
)}
61+
<Group justify='flex-end'>
62+
<Button variant='default' color='gray' loading={loading} onClick={onDeny}>
63+
Keep current
64+
</Button>
65+
<Button loading={loading} onClick={onConfirm}>
66+
Use extracted abstract
67+
</Button>
68+
</Group>
69+
</Stack>
70+
</Modal>
71+
)
72+
}
73+
74+
export default AbstractSuggestionModal

client/src/thesis/requests/responses/thesis.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ export interface IThesis extends IThesisOverview {
6565
visibility: string
6666
infoText?: string
6767
abstractText?: string
68+
abstractSource?: 'MANUAL' | 'EXTRACTED'
69+
abstractSuggestion?: string
6870
applicationId: string | null
6971
anonymized?: boolean
7072
anonymizedAt?: string

0 commit comments

Comments
 (0)