Skip to content

Commit 8c304c9

Browse files
authored
feat: add lookup command for venue normalization (#1045)
Introduce a dedicated local lookup workflow for journal/conference normalization so identifier and acronym resolution can be validated independently of assessment dispatch. Adds CLI commands, a reusable VenueLookupService, acronym narrowing for standalone acronyms, and reverse ISSN/eISSN resolution back to normalized venue names. [AI-assisted]
1 parent aeec951 commit 8c304c9

File tree

4 files changed

+785
-0
lines changed

4 files changed

+785
-0
lines changed

src/aletheia_probe/cli.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from .dispatcher import query_dispatcher
2626
from .enums import AssessmentType
2727
from .logging_config import get_status_logger, setup_logging
28+
from .lookup import LookupResult, VenueLookupService
2829
from .models import AssessmentResult, CandidateAssessment, QueryInput, VenueType
2930
from .normalizer import are_conference_names_equivalent, input_normalizer
3031
from .output_formatter import output_formatter
@@ -377,6 +378,130 @@ def conference(
377378
)
378379

379380

381+
@main.group(name="lookup")
382+
def lookup() -> None:
383+
"""Look up normalized venue candidates and known identifiers."""
384+
pass
385+
386+
387+
@lookup.command(name="journal")
388+
@click.argument("journal_name")
389+
@click.option(
390+
"--confidence-min",
391+
default=DEFAULT_ACRONYM_CONFIDENCE_MIN,
392+
show_default=True,
393+
type=click.FloatRange(min=0.0, max=1.0),
394+
help="Minimum acronym dataset confidence for variant candidates",
395+
)
396+
@click.option(
397+
"--format",
398+
"output_format",
399+
default="text",
400+
type=click.Choice(["text", "json"]),
401+
help="Output format",
402+
)
403+
@handle_cli_errors
404+
def lookup_journal(
405+
journal_name: str,
406+
confidence_min: float,
407+
output_format: str,
408+
) -> None:
409+
"""Look up normalized forms and identifiers for a journal input."""
410+
_run_lookup_cli(journal_name, VenueType.JOURNAL, output_format, confidence_min)
411+
412+
413+
@lookup.command(name="conference")
414+
@click.argument("conference_name")
415+
@click.option(
416+
"--confidence-min",
417+
default=DEFAULT_ACRONYM_CONFIDENCE_MIN,
418+
show_default=True,
419+
type=click.FloatRange(min=0.0, max=1.0),
420+
help="Minimum acronym dataset confidence for variant candidates",
421+
)
422+
@click.option(
423+
"--format",
424+
"output_format",
425+
default="text",
426+
type=click.Choice(["text", "json"]),
427+
help="Output format",
428+
)
429+
@handle_cli_errors
430+
def lookup_conference(
431+
conference_name: str,
432+
confidence_min: float,
433+
output_format: str,
434+
) -> None:
435+
"""Look up normalized forms and identifiers for a conference input."""
436+
_run_lookup_cli(
437+
conference_name, VenueType.CONFERENCE, output_format, confidence_min
438+
)
439+
440+
441+
def _run_lookup_cli(
442+
publication_name: str,
443+
venue_type: VenueType,
444+
output_format: str,
445+
confidence_min: float,
446+
) -> None:
447+
"""Run lookup and print results in the requested format."""
448+
service = VenueLookupService()
449+
result = service.lookup(
450+
publication_name, venue_type=venue_type, confidence_min=confidence_min
451+
)
452+
453+
if output_format == "json":
454+
print(json.dumps(result.to_dict(), indent=2))
455+
return
456+
457+
print(_format_lookup_result_text(result))
458+
459+
460+
def _format_lookup_result_text(result: LookupResult) -> str:
461+
"""Format lookup results for human-readable CLI output."""
462+
lines = [
463+
f"Lookup: {result.raw_input}",
464+
f"Venue Type: {result.venue_type.value}",
465+
f"Primary Normalized Name: {result.normalized_name or '-'}",
466+
f"ISSN Checksum Valid: {'yes' if result.issn_valid else 'no'}",
467+
"",
468+
"Normalized Names:",
469+
]
470+
471+
if result.normalized_names:
472+
for name in result.normalized_names:
473+
lines.append(f"- {name}")
474+
else:
475+
lines.append("- (none)")
476+
477+
lines.extend(["", "Identifiers:"])
478+
lines.append(f"- input identifiers: {result.identifiers or {}}")
479+
lines.append(f"- issns: {result.issns or []}")
480+
lines.append(f"- eissns: {result.eissns or []}")
481+
482+
lines.extend(["", "Candidates:"])
483+
if not result.candidates:
484+
lines.append("- (none)")
485+
return "\n".join(lines)
486+
487+
for candidate in result.candidates:
488+
candidate_line = f"- {candidate.source}: {candidate.normalized_name}"
489+
details: list[str] = []
490+
if candidate.acronym:
491+
details.append(f"acronym={candidate.acronym}")
492+
if candidate.confidence is not None:
493+
details.append(f"confidence={candidate.confidence:.2f}")
494+
if candidate.issn:
495+
details.append(f"issn={candidate.issn}")
496+
if candidate.eissn:
497+
details.append(f"eissn={candidate.eissn}")
498+
if details:
499+
candidate_line += f" ({', '.join(details)})"
500+
lines.append(candidate_line)
501+
502+
return "\n".join(lines)
503+
504+
380505
@main.command()
381506
@handle_cli_errors
382507
def config() -> None:

0 commit comments

Comments
 (0)