Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
6a635a4
First working commit
ConnorYoh Apr 22, 2026
36f93bd
Refactor PDF Comment Agent into composable add-comments + AI tool
ConnorYoh Apr 22, 2026
7b6715f
Separate math specialist from presentation (multi-turn meta-agents)
ConnorYoh Apr 23, 2026
6dfb307
Gate PDF Comment Agent behind the prototypes build
ConnorYoh Apr 23, 2026
e8d95d0
Merge branch 'main' of https://github.com/Stirling-Tools/Stirling-PDF…
ConnorYoh Apr 23, 2026
c87f24f
Move AI agent tools from /api/v1/misc/ to /api/v1/ai/tools/
ConnorYoh Apr 23, 2026
de22bc4
Anchor sticky notes at matched text in add-comments
ConnorYoh Apr 23, 2026
e86b604
Clean up trace artefacts, thumbnail churn, and stale javadoc
ConnorYoh Apr 23, 2026
a058cd0
Share file-size cap across AI tools and harden annotation specs
ConnorYoh Apr 24, 2026
1bf99be
Wrap engine network failures and escape user-prompt delimiter
ConnorYoh Apr 24, 2026
fbeed20
Centralise tool-report header and harden filename parsing
ConnorYoh Apr 24, 2026
4be288a
Cover resume-turn artifact round-trip and agent-failure paths
ConnorYoh Apr 24, 2026
461131c
update tool models
ConnorYoh Apr 24, 2026
9af5fa9
engine fix
ConnorYoh Apr 24, 2026
c2c6813
Merge branch 'main' into pdf-comment-agent
ConnorYoh Apr 24, 2026
9b7ad3e
Fix type errors,
ConnorYoh Apr 24, 2026
6081068
Merge branch 'pdf-comment-agent' of https://github.com/Stirling-Tools…
ConnorYoh Apr 24, 2026
093aa6d
format
ConnorYoh Apr 24, 2026
9264ed5
Need a real OS to run tool model generation
ConnorYoh Apr 24, 2026
e23cfaf
Move AI tools to hand-written registry, delegates own plan emission
ConnorYoh Apr 24, 2026
40b7a6b
Document gitignore trace entries, typecheck prototypes in typecheck:all
ConnorYoh Apr 24, 2026
d9ca8e9
Stop LLM filling chunk_id strings; use bounded ordinals instead
ConnorYoh Apr 27, 2026
51462c3
Stop emitting English from math-audit consumers; LLM speaks user's la…
ConnorYoh Apr 27, 2026
78de42e
Drop English math-intent regex; orchestrator LLM emits the flag
ConnorYoh Apr 27, 2026
d7872a1
Renive added git ignore
ConnorYoh Apr 27, 2026
a7d8e49
Embed math-audit plan in PdfQuestionAnswerResponse, not as a union va…
ConnorYoh Apr 27, 2026
73b0910
Move math-intent decision into a shared classifier on the consumers
ConnorYoh Apr 27, 2026
f0734be
Surface ai-workflow as a prototype super-tool to fix prototypes typec…
ConnorYoh Apr 27, 2026
607c4b4
Drop test __init__.py files; switch pytest to importlib mode
ConnorYoh Apr 28, 2026
1281771
Trim orchestrator system prompt — drop AI-correction sentence
ConnorYoh Apr 28, 2026
2fc1ec7
Catch ValidationError specifically in extract_math_verdict
ConnorYoh Apr 28, 2026
3c56a48
Use Pretty(response) directly — let it lazy-dump
ConnorYoh Apr 28, 2026
38760f1
Slim _run_pdf_edit and _run_agent_draft to match the other delegates
ConnorYoh Apr 28, 2026
f065907
Type ToolReportArtifact.report; replace dict[str, Any] with Verdict
ConnorYoh Apr 28, 2026
09cf504
Merge branch 'main' into pdf-comment-agent
ConnorYoh Apr 28, 2026
b9b3535
Update engine/src/stirling/contracts/common.py
ConnorYoh Apr 30, 2026
6b8f9b7
Merge branch 'main' into pdf-comment-agent
ConnorYoh Apr 30, 2026
1cdff7f
Merge branch 'main' into pdf-comment-agent
jbrunton96 May 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .taskfiles/frontend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ tasks:
- task: typecheck:saas
- task: typecheck:desktop
- task: typecheck:scripts
- task: typecheck:prototypes

# ============================================================
# Quality Gate
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package stirling.software.common.model.api.comments;

/**
* Absolute position of a PDF annotation in the document.
*
* <p>Coordinates are in PDF user-space with the origin at the page's bottom-left, consistent with
* PDFBox's {@code PDRectangle} convention.
*
* @param pageIndex 0-indexed page number the annotation lives on.
* @param x bottom-left x coordinate of the annotation rectangle.
* @param y bottom-left y coordinate of the annotation rectangle.
* @param width width of the annotation rectangle, in user-space units.
* @param height height of the annotation rectangle, in user-space units.
*/
public record AnnotationLocation(int pageIndex, float x, float y, float width, float height) {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package stirling.software.common.model.api.comments;

/**
* Description of a single sticky-note (PDF Text) annotation to place on a document.
*
* <p>{@code author} and {@code subject} are optional — callers that pass {@code null} get a default
* author/subject from {@code PdfAnnotationService}.
*
* @param location where to anchor the annotation icon, in PDF user-space.
* @param text the comment body shown in the popup (required, non-blank).
* @param author optional author label shown in the popup; {@code null} → service default.
* @param subject optional subject line shown in the popup; {@code null} → service default.
*/
public record StickyNoteSpec(
AnnotationLocation location, String text, String author, String subject) {}
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,17 @@
@Slf4j
public class InternalApiClient {

// Allowlist for internal dispatch. Matches a fixed namespace prefix,
// Allowlist for internal dispatch. Matches fixed namespace prefixes,
// but rejects traversal (..), URL-encoding (%), query/fragment, backslashes, and any other
// character that could alter the resolved endpoint on the local Spring server.
//
// The second alternation carves out `/api/v1/ai/tools/*` specifically — AI tools are
// dispatchable, but the broader `/api/v1/ai/` surface (orchestrate, health, etc.) is
// intentionally NOT permitted to avoid plan steps re-entering the orchestrator.
private static final Pattern ALLOWED_ENDPOINT_PATH =
Pattern.compile("^/api/v1/(general|misc|security|convert|filter)(/[A-Za-z0-9_-]+)+$");
Pattern.compile(
"^/api/v1/(general|misc|security|convert|filter)(/[A-Za-z0-9_-]+)+$"
+ "|^/api/v1/ai/tools(/[A-Za-z0-9_-]+)+$");

private final ServletContext servletContext;
private final UserServiceInterface userService;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package stirling.software.common.service;

import java.util.Calendar;
import java.util.List;

import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;
import org.springframework.stereotype.Service;

import lombok.extern.slf4j.Slf4j;

import stirling.software.common.model.api.comments.AnnotationLocation;
import stirling.software.common.model.api.comments.StickyNoteSpec;

/**
* Shared primitive for adding sticky-note (PDF Text) annotations to a document.
*
* <p>Used by:
*
* <ul>
* <li>{@code /api/v1/misc/add-comments} — a deterministic, reusable tool.
* <li>AI-agent flows that generate comment specs (e.g. PDF review agent, math auditor review
* mode) and hand them off to this service for deterministic placement.
* </ul>
*/
@Slf4j
@Service
public class PdfAnnotationService {

/** Yellow sticky-note fill colour (R, G, B in 0..1 range). */
private static final float[] STICKY_NOTE_COLOR_RGB = {1f, 0.95f, 0.4f};

/** Opacity for the sticky-note icon. */
private static final float ANNOTATION_OPACITY = 0.9f;

/** PDF Text-annotation icon name — {@code "Comment"} is one of the standard icons. */
private static final String ANNOTATION_ICON_NAME = "Comment";

/** Default subject shown in the annotation popup when a spec does not supply one. */
private static final String DEFAULT_SUBJECT = "Stirling AI Comment";

/** Default author label shown in the annotation popup when a spec does not supply one. */
private static final String DEFAULT_AUTHOR = "Stirling AI";

/**
* Cap on sticky-note text length. PDF annotation bodies can technically be much longer, but
* anything beyond this is almost certainly pathological (accidental document-dump or malicious
* payload) and would bloat the output file.
*/
private static final int MAX_COMMENT_TEXT_LENGTH = 100_000;

/**
* Add a list of sticky notes to {@code doc}. Specs that reference an out-of-range page or
* contain blank text are logged and skipped; this method never throws for a single bad spec.
*
* @return the number of annotations actually applied
*/
public int addStickyNotes(PDDocument doc, List<StickyNoteSpec> specs) {
if (specs == null || specs.isEmpty()) {
return 0;
}
int totalPages = doc.getNumberOfPages();
Calendar now = Calendar.getInstance();
int applied = 0;
for (int i = 0; i < specs.size(); i++) {
StickyNoteSpec spec = specs.get(i);
if (!isValid(spec, totalPages, i)) {
continue;
}
apply(doc, spec, now);
applied++;
}
if (applied < specs.size()) {
log.warn(
"Applied {}/{} sticky notes; {} skipped due to invalid specs.",
applied,
specs.size(),
specs.size() - applied);
}
return applied;
}

/**
* Add a single sticky note. Convenience wrapper; prefer {@link #addStickyNotes(PDDocument,
* List)} when placing multiple annotations so log output is batched.
*/
public void addStickyNote(PDDocument doc, StickyNoteSpec spec) {
addStickyNotes(doc, List.of(spec));
}

private boolean isValid(StickyNoteSpec spec, int totalPages, int index) {
if (spec == null || spec.location() == null) {
log.warn("Skipping sticky-note[{}]: spec or location is null.", index);
return false;
}
if (spec.text() == null || spec.text().isBlank()) {
log.warn("Skipping sticky-note[{}]: text is blank.", index);
return false;
}
if (spec.text().length() > MAX_COMMENT_TEXT_LENGTH) {
log.warn(
"Skipping sticky-note[{}]: text length {} exceeds limit {}.",
index,
spec.text().length(),
MAX_COMMENT_TEXT_LENGTH);
return false;
}
AnnotationLocation loc = spec.location();
if (loc.width() <= 0f || loc.height() <= 0f) {
log.warn(
"Skipping sticky-note[{}]: non-positive dimensions width={} height={}.",
index,
loc.width(),
loc.height());
return false;
}
int page = loc.pageIndex();
if (page < 0 || page >= totalPages) {
log.warn(
"Skipping sticky-note[{}]: pageIndex={} out of range [0, {}).",
index,
page,
totalPages);
return false;
}
return true;
}

private void apply(PDDocument doc, StickyNoteSpec spec, Calendar now) {
AnnotationLocation loc = spec.location();

PDAnnotationText annot = new PDAnnotationText();
annot.setContents(spec.text());
annot.setRectangle(new PDRectangle(loc.x(), loc.y(), loc.width(), loc.height()));
annot.setSubject(nonBlankOr(spec.subject(), DEFAULT_SUBJECT));
annot.setTitlePopup(nonBlankOr(spec.author(), DEFAULT_AUTHOR));
annot.setColor(new PDColor(STICKY_NOTE_COLOR_RGB, PDDeviceRGB.INSTANCE));
annot.setCreationDate(now);
annot.setConstantOpacity(ANNOTATION_OPACITY);
annot.getCOSObject().setName(COSName.NAME, ANNOTATION_ICON_NAME);

try {
doc.getPage(loc.pageIndex()).getAnnotations().add(annot);
} catch (java.io.IOException e) {
log.warn(
"Failed to attach sticky note to page {}: {}", loc.pageIndex(), e.getMessage());
}
}

private static String nonBlankOr(String value, String fallback) {
return value != null && !value.isBlank() ? value : fallback;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package stirling.software.common.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Optional;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.springframework.stereotype.Component;

import lombok.extern.slf4j.Slf4j;

/**
* Locate text on a specific PDF page and return its bounding box in PDF user-space (bottom-left
* origin). Used by tools that receive "anchor by text" hints — e.g. {@code
* /api/v1/misc/add-comments} when callers supply an {@code anchorText} instead of explicit
* coordinates.
*
* <p>Matching is tolerant: case-insensitive with punctuation/whitespace stripped on both sides, so
* a caller-supplied needle of {@code "215000"} matches page text {@code "$215,000"}, and {@code
* "Total Revenue"} matches {@code "Total Revenue."}.
*/
@Slf4j
@Component
public class PdfTextLocator {

/** One found line of text with its user-space bounding box. */
public record MatchedBox(float x, float y, float width, float height) {}

/**
* Find the first line on {@code pageIndex} (0-indexed) whose text contains {@code needle} under
* the tolerant match. Returns empty when no match, when the page index is out of range, or when
* the needle is blank.
*/
public Optional<MatchedBox> findOnPage(PDDocument doc, int pageIndex, String needle) {
if (doc == null
|| needle == null
|| needle.isBlank()
|| pageIndex < 0
|| pageIndex >= doc.getNumberOfPages()) {
return Optional.empty();
}
String normalizedNeedle = normalize(needle);
if (normalizedNeedle.isEmpty()) {
return Optional.empty();
}

List<CapturedLine> lines = new ArrayList<>();
LineCapturingStripper stripper;
try {
stripper = new LineCapturingStripper(lines);
stripper.setStartPage(pageIndex + 1);
stripper.setEndPage(pageIndex + 1);
stripper.setSortByPosition(true);
// Side effect: populates `lines`. We don't need the concatenated text.
stripper.getText(doc);
} catch (IOException e) {
log.warn(
"PdfTextLocator failed to extract text on page {}: {}",
pageIndex,
e.getMessage());
return Optional.empty();
}

PDRectangle mediaBox = doc.getPage(pageIndex).getMediaBox();
float pageHeight = mediaBox.getHeight();

for (CapturedLine line : lines) {
if (normalize(line.text).contains(normalizedNeedle)) {
// PDFBox's *DirAdj coords descend from the top of the page; convert to PDF
// user-space (origin = bottom-left) so the bbox can feed a PDRectangle directly.
float userSpaceY = pageHeight - line.yTopDown - line.height;
return Optional.of(new MatchedBox(line.x, userSpaceY, line.width, line.height));
}
}
return Optional.empty();
}

/** Strip everything non-alphanumeric and lowercase for tolerant matching. */
private static String normalize(String s) {
return s.replaceAll("[^A-Za-z0-9]", "").toLowerCase(Locale.ROOT);
}

private static final class CapturedLine {
String text;
float x;
float yTopDown;
float width;
float height;
}

private static final class LineCapturingStripper extends PDFTextStripper {
private final List<CapturedLine> lines;

LineCapturingStripper(List<CapturedLine> sink) throws IOException {
super();
this.lines = sink;
}

@Override
protected void writeString(String text, List<TextPosition> textPositions)
throws IOException {
if (textPositions != null && !textPositions.isEmpty()) {
CapturedLine line = new CapturedLine();
line.text = text;

float minX = Float.MAX_VALUE;
float maxRight = 0f;
float minY = Float.MAX_VALUE;
float maxHeight = 0f;
for (TextPosition p : textPositions) {
float x = p.getXDirAdj();
float y = p.getYDirAdj();
float w = p.getWidthDirAdj();
float h = p.getHeightDir();
if (h == 0f) {
// Workaround: some fonts report 0 height via TextPosition; fall back to
// the nominal font size so downstream bboxes are never zero-height.
h = p.getFontSizeInPt();
}
if (x < minX) minX = x;
if (x + w > maxRight) maxRight = x + w;
if (y < minY) minY = y;
if (h > maxHeight) maxHeight = h;
}
line.x = minX;
line.width = maxRight - minX;
line.yTopDown = minY;
line.height = maxHeight;
lines.add(line);
}
super.writeString(text, textPositions);
}
}
}
Loading
Loading