Skip to content

Commit 1557936

Browse files
feat: add acronym export and import commands [AI-assisted] (#1010)
Implemented 'export' and 'import' commands for the acronym database. This allows users to create datasets of collected acronyms and share them between installations. - Added export_all_variants and import_variants to AcronymCache - Added export and import commands to 'acronym' CLI group - Renamed 'list' function to 'list_acronyms' to avoid shadowing built-in 'list' - Fixed Ruff B904 issues in CLI exception handling Co-authored-by: florath-ai-assistant[bot] <Andreas.Florath@telekom.de>
1 parent 11e02df commit 1557936

File tree

2 files changed

+146
-2
lines changed

2 files changed

+146
-2
lines changed

src/aletheia_probe/cache/acronym_cache.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,81 @@ def get_acronym_stats(self, entity_type: str | None = None) -> dict[str, int]:
338338

339339
return {"total_count": count}
340340

341+
def export_all_variants(self) -> list[dict[str, Any]]:
342+
"""Export all acronym variants from the database.
343+
344+
Returns:
345+
List of dictionaries containing all variant fields.
346+
"""
347+
detail_logger.debug("Exporting all acronym variants")
348+
349+
with self.get_connection_with_row_factory() as conn:
350+
cursor = conn.cursor()
351+
cursor.execute(
352+
"""
353+
SELECT acronym, entity_type, variant_name, normalized_name,
354+
usage_count, is_canonical, is_ambiguous, source
355+
FROM venue_acronym_variants
356+
ORDER BY acronym, entity_type, usage_count DESC
357+
"""
358+
)
359+
return [dict(row) for row in cursor.fetchall()]
360+
361+
def import_variants(
362+
self, variants: list[dict[str, Any]], merge: bool = True
363+
) -> int:
364+
"""Import acronym variants into the database.
365+
366+
Args:
367+
variants: List of variant dictionaries (as exported by export_all_variants).
368+
merge: If True, merges with existing data (increments counts).
369+
If False, fails on conflict or requires clear first (currently only merge supported).
370+
371+
Returns:
372+
Number of variants imported/updated.
373+
"""
374+
if not variants:
375+
return 0
376+
377+
detail_logger.debug(f"Importing {len(variants)} acronym variants")
378+
count = 0
379+
380+
for variant in variants:
381+
# Validate required fields
382+
if not all(
383+
k in variant for k in ["acronym", "entity_type", "normalized_name"]
384+
):
385+
detail_logger.warning(f"Skipping invalid variant: {variant}")
386+
continue
387+
388+
# Default values for missing fields
389+
variant_name = variant.get("variant_name", variant["normalized_name"])
390+
usage_count = variant.get("usage_count", 1)
391+
source = variant.get("source", "import")
392+
393+
# Use store_variant to handle merge logic
394+
self.store_variant(
395+
acronym=variant["acronym"],
396+
entity_type=variant["entity_type"],
397+
variant_name=variant_name,
398+
normalized_name=variant["normalized_name"],
399+
usage_count=usage_count,
400+
source=source,
401+
)
402+
403+
# Restore is_ambiguous status if present
404+
if variant.get("is_ambiguous"):
405+
self.mark_acronym_as_ambiguous(
406+
variant["acronym"], variant["entity_type"]
407+
)
408+
409+
# Update canonical status
410+
self.update_canonical_variant(variant["acronym"], variant["entity_type"])
411+
412+
count += 1
413+
414+
return count
415+
341416
def list_all_acronyms(
342417
self, entity_type: str | None = None, limit: int | None = None, offset: int = 0
343418
) -> list[dict[str, str]]:

src/aletheia_probe/cli.py

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -412,11 +412,11 @@ def stats() -> None:
412412
status_logger.info(f"Total acronyms: {total:,}")
413413

414414

415-
@acronym.command()
415+
@acronym.command(name="list")
416416
@click.option("--limit", type=int, help="Maximum number of entries to display")
417417
@click.option("--offset", type=int, default=0, help="Number of entries to skip")
418418
@handle_cli_errors
419-
def list(limit: int | None, offset: int) -> None:
419+
def list_acronyms(limit: int | None, offset: int) -> None:
420420
"""List all acronym mappings in the database.
421421
422422
Args:
@@ -446,6 +446,75 @@ def list(limit: int | None, offset: int) -> None:
446446
status_logger.info(f"\nShowing {shown} of {total_count:,} total acronyms")
447447

448448

449+
@acronym.command()
450+
@click.argument("output_file", type=click.Path())
451+
@handle_cli_errors
452+
def export(output_file: str) -> None:
453+
"""Export the entire acronym database to a JSON file.
454+
455+
Args:
456+
output_file: Path to the output JSON file.
457+
"""
458+
status_logger = get_status_logger()
459+
acronym_cache = AcronymCache()
460+
461+
variants = acronym_cache.export_all_variants()
462+
463+
try:
464+
with open(output_file, "w", encoding="utf-8") as f:
465+
json.dump(variants, f, indent=2, ensure_ascii=False)
466+
467+
status_logger.info(
468+
f"Successfully exported {len(variants)} acronym variants to {output_file}"
469+
)
470+
except Exception as e:
471+
status_logger.error(f"Failed to export acronyms: {e}")
472+
raise click.ClickException(str(e)) from e
473+
474+
475+
@acronym.command(name="import")
476+
@click.argument("input_file", type=click.Path(exists=True))
477+
@click.option(
478+
"--merge/--no-merge",
479+
default=True,
480+
help="Merge with existing data (default) or replace",
481+
)
482+
@handle_cli_errors
483+
def import_acronyms(input_file: str, merge: bool) -> None:
484+
"""Import acronyms from a JSON file.
485+
486+
Args:
487+
input_file: Path to the input JSON file.
488+
merge: Whether to merge with existing data.
489+
"""
490+
status_logger = get_status_logger()
491+
acronym_cache = AcronymCache()
492+
493+
try:
494+
with open(input_file, encoding="utf-8") as f:
495+
variants = json.load(f)
496+
497+
if not isinstance(variants, list):
498+
raise ValueError("Input file must contain a JSON list of variants")
499+
500+
status_logger.info(f"Read {len(variants)} variants from {input_file}")
501+
502+
if not merge:
503+
if click.confirm(
504+
"This will clear existing acronyms before importing. Continue?",
505+
abort=True,
506+
):
507+
acronym_cache.clear_acronym_database()
508+
509+
count = acronym_cache.import_variants(variants, merge=True)
510+
511+
status_logger.info(f"Successfully imported {count} acronym variants")
512+
513+
except Exception as e:
514+
status_logger.error(f"Failed to import acronyms: {e}")
515+
raise click.ClickException(str(e)) from e
516+
517+
449518
@acronym.command()
450519
@click.option("--confirm", is_flag=True, help="Skip confirmation prompt")
451520
@handle_cli_errors

0 commit comments

Comments
 (0)