mkassaf
diff --git a/‎citesentry/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎citesentry/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎citesentry/__pycache__/__init__.cpython-311.pyc‎
0 Bytes b/‎citesentry/__pycache__/__init__.cpython-311.pyc‎
0 Bytes
diff --git a/‎citesentry/checks/__pycache__/existence.cpython-311.pyc‎
366 Bytes b/‎citesentry/checks/__pycache__/existence.cpython-311.pyc‎
366 Bytes
diff --git a/‎citesentry/checks/existence.py‎
Lines changed: 4 additions & 1 deletion b/‎citesentry/checks/existence.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎citesentry/parse/pdf_refs.py‎
Lines changed: 10 additions & 1 deletion b/‎citesentry/parse/pdf_refs.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎uv.lock‎
Lines changed: 19 additions & 1 deletion b/‎uv.lock‎
Lines changed: 19 additions & 1 deletion
@@ -1,3 +1,3 @@
 """citesentry — citation verification tool."""
 
-__version__ = "0.3.1"
+__version__ = "0.3.2"
@@ -163,7 +163,10 @@ async def check_existence(
             except Exception as e:
                 evidence[f"{src.name}_error"] = str(e)
 
-    if not candidates and domain_sources:
+    # Query domain sources when no good candidate found yet — this handles papers
+    # (e.g. ICML/ICLR proceedings) that Semantic Scholar misses but DBLP covers well.
+    best_score_so_far = max((c[0] for c in candidates), default=0.0)
+    if (not candidates or best_score_so_far < _TITLE_PASS_THRESHOLD / 100.0) and domain_sources:
         for src in domain_sources:
             try:
                 if effective_doi:
 
@@ -17,11 +17,20 @@
 
 
 def _extract_text(path: Path) -> str:
+    # PyMuPDF handles multi-column layouts and line order far better than pdfminer
+    try:
+        import fitz  # PyMuPDF
+        doc = fitz.open(str(path))
+        text = "\n".join(page.get_text() for page in doc)
+        doc.close()
+        return text
+    except ImportError:
+        pass
     try:
         from pdfminer.high_level import extract_text
         return extract_text(str(path))
     except ImportError as e:
-        raise ImportError("pdfminer.six is required: pip install pdfminer.six") from e
+        raise ImportError("Install pymupdf or pdfminer.six: pip install pymupdf") from e
 
 
 def _find_ref_section(text: str) -> str | None:
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "citesentry"
-version = "0.3.1"
+version = "0.3.2"
 description = "Citation verification tool: existence, URL liveness, and content relevance checks"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -20,6 +20,7 @@ dependencies = [
     "pdfminer.six>=20221105",
     "mcp[cli]>=1.0",
     "platformdirs>=4",
+    "pymupdf>=1.27.2.3",
 ]
 
 [project.optional-dependencies]
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""citesentry — citation verification tool."""`
`2`	`2`
`3`		`-__version__ = "0.3.1"`
	`3`	`+__version__ = "0.3.2"`