-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
1155 lines (989 loc) · 35.8 KB
/
cli.py
File metadata and controls
1155 lines (989 loc) · 35.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
Booksmith - Unified Command-Line Interface
A high-quality tool for adapting EPUB content using LLMs. This CLI provides
subcommands for editing, profiling, validation, analysis, planning, checking,
cost estimation, and annotating EPUB files.
Usage:
python cli.py <command> [options]
Commands:
edit - Core editing (wraps epub_cleaner.py)
profile - Build author profile (wraps style_profiler.py)
validate - Check style drift (wraps style_validator.py)
analyze - Extract book model (wraps book_analyzer.py)
plan - Plan changes (wraps change_planner.py)
check - Verify consistency (wraps consistency_checker.py)
estimate - Estimate API costs before processing
annotate - Add commentary (wraps annotator.py + footnote_inserter.py)
Examples:
python cli.py edit --input book.epub --output clean.epub
python cli.py profile --input *.epub --output author.json
python cli.py validate --original original.txt --modified modified.txt
python cli.py analyze --input book.epub --output model.json
python cli.py estimate --input book.epub --workflow transform
python cli.py annotate --input book.epub --config commentary.yaml --output annotated.epub
python cli.py --workflow cleanup --input book.epub
For help on a specific command:
python cli.py <command> --help
"""
import argparse
import glob
import os
import sys
from pathlib import Path
from typing import Optional
try:
import yaml
except ImportError:
yaml = None
# ---------- VERSION ----------
__version__ = "0.1.0"
# ---------- WORKFLOW LOADING ----------
def find_workflow_file(workflow_name: str) -> Optional[Path]:
"""
Find a workflow configuration file by name.
Searches in:
1. ./workflows/<name>.yaml
2. ./workflows/<name>.yml
3. <script_dir>/workflows/<name>.yaml
4. <script_dir>/workflows/<name>.yml
"""
script_dir = Path(__file__).parent
search_paths = [
Path("workflows") / f"{workflow_name}.yaml",
Path("workflows") / f"{workflow_name}.yml",
script_dir / "workflows" / f"{workflow_name}.yaml",
script_dir / "workflows" / f"{workflow_name}.yml",
]
for path in search_paths:
if path.exists():
return path
return None
def load_workflow(workflow_name: str) -> dict:
"""
Load a workflow configuration from YAML file.
Returns a dict with configuration overrides for the subcommand.
"""
if yaml is None:
print("Error: PyYAML not installed. Run: pip install pyyaml")
sys.exit(1)
workflow_path = find_workflow_file(workflow_name)
if workflow_path is None:
# List available workflows
script_dir = Path(__file__).parent
available = []
for search_dir in [Path("workflows"), script_dir / "workflows"]:
if search_dir.exists():
for f in search_dir.glob("*.yaml"):
available.append(f.stem)
for f in search_dir.glob("*.yml"):
available.append(f.stem)
available = sorted(set(available))
print(f"Error: Workflow not found: {workflow_name}")
if available:
print(f"Available workflows: {', '.join(available)}")
else:
print("No workflows found in ./workflows/ directory.")
print("Create workflow files (e.g., workflows/cleanup.yaml) to use --workflow.")
sys.exit(1)
try:
with open(workflow_path, 'r', encoding='utf-8') as f:
workflow = yaml.safe_load(f) or {}
print(f"Loaded workflow: {workflow_name} (from {workflow_path})")
return workflow
except yaml.YAMLError as e:
print(f"Error parsing workflow file: {e}")
sys.exit(1)
# ---------- SUBCOMMAND: edit ----------
def setup_edit_parser(subparsers) -> None:
"""Set up the 'edit' subcommand parser."""
parser = subparsers.add_parser(
'edit',
help='Core editing - selectively rewrite paragraphs using LLM analysis',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Edit EPUB content using LLM-based analysis and selective rewriting.
This command sends full chapters for context, but only rewrites problematic
paragraphs - providing both accuracy and efficiency.
""",
epilog="""
Examples:
%(prog)s --input book.epub --output book_cleaned.epub
%(prog)s --input book.epub --dry-run
%(prog)s --input book.epub --config my_config.yaml --prompts my_prompts.yaml
%(prog)s --input book.epub --output clean.epub --verbose
Environment variables:
ANTHROPIC_API_KEY Your Anthropic API key (required for Claude)
"""
)
parser.add_argument(
'--input', '-i',
required=True,
help='Input EPUB file path'
)
parser.add_argument(
'--output', '-o',
help='Output EPUB file path (default: input_cleaned.epub)'
)
parser.add_argument(
'--config', '-c',
help='Path to config.yaml file (default: ./config.yaml)'
)
parser.add_argument(
'--prompts', '-p',
help='Path to prompts.yaml file (default: ./prompts.yaml)'
)
parser.add_argument(
'--dry-run', '-n',
action='store_true',
help='Analyze and report changes without modifying files'
)
def run_edit(args, verbose: bool = False) -> int:
"""Run the edit subcommand."""
try:
from epub_cleaner import main as epub_cleaner_main, load_config, load_prompts, create_client, process_epub
except ImportError:
print("Error: Could not import epub_cleaner module")
print("Make sure epub_cleaner.py is in the same directory")
return 1
# Validate input file
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {args.input}")
return 1
if not input_path.suffix.lower() == '.epub':
print(f"Warning: Input file does not have .epub extension: {args.input}")
# Determine output path
if args.output:
output_path = args.output
else:
output_path = str(input_path.with_stem(input_path.stem + '_cleaned'))
# Load configuration and prompts
config = load_config(args.config)
prompts = load_prompts(args.prompts)
# Create LLM client
client = create_client(config)
# Process the EPUB
process_epub(
input_path=str(input_path),
output_path=output_path,
config=config,
prompts=prompts,
client=client,
dry_run=args.dry_run
)
return 0
# ---------- SUBCOMMAND: profile ----------
def setup_profile_parser(subparsers) -> None:
"""Set up the 'profile' subcommand parser."""
parser = subparsers.add_parser(
'profile',
help='Build author profile - analyze EPUBs to create style profiles',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Analyze one or more EPUB files to create a comprehensive author style profile
using Claude for deep literary analysis.
The profile includes:
- Overall writing style description
- Character voice patterns
- Thematic elements
- Do/avoid guidelines for style preservation
""",
epilog="""
Examples:
%(prog)s --input book.epub --output profile.json
%(prog)s --input book1.epub book2.epub book3.epub --output author_profile.json
%(prog)s --input "author_works/*.epub" --output tolkien_profile.json
%(prog)s --input book.epub --model claude-sonnet-4-20250514 --output profile.json
Environment variables:
ANTHROPIC_API_KEY Your Anthropic API key (required)
"""
)
parser.add_argument(
'--input', '-i',
nargs='+',
required=True,
help='Input EPUB file(s). Supports multiple files and glob patterns.'
)
parser.add_argument(
'--output', '-o',
default='author_profile.json',
help='Output JSON profile path (default: author_profile.json)'
)
parser.add_argument(
'--model', '-m',
default='claude-sonnet-4-20250514',
help='Claude model to use (default: claude-sonnet-4-20250514)'
)
def run_profile(args, verbose: bool = False) -> int:
"""Run the profile subcommand."""
try:
from style_profiler import create_client, generate_style_profile
import json
except ImportError:
print("Error: Could not import style_profiler module")
print("Make sure style_profiler.py is in the same directory")
return 1
# Expand glob patterns and collect all EPUB files
epub_files = []
for pattern in args.input:
if '*' in pattern or '?' in pattern:
matches = glob.glob(pattern, recursive=True)
epub_files.extend([f for f in matches if f.lower().endswith('.epub')])
else:
if Path(pattern).exists():
epub_files.append(pattern)
else:
print(f"Warning: File not found: {pattern}")
# Remove duplicates while preserving order
seen = set()
epub_files = [f for f in epub_files if not (f in seen or seen.add(f))]
if not epub_files:
print("Error: No valid EPUB files found")
return 1
# Create client
client = create_client()
# Generate profile
profile = generate_style_profile(
client=client,
model=args.model,
epub_paths=epub_files,
verbose=verbose
)
# Save profile
output_path = Path(args.output)
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(profile, f, indent=2, ensure_ascii=False)
print(f"\nProfile saved to: {output_path.absolute()}")
return 0
# ---------- SUBCOMMAND: validate ----------
def setup_validate_parser(subparsers) -> None:
"""Set up the 'validate' subcommand parser."""
parser = subparsers.add_parser(
'validate',
help='Check style drift - compare original vs modified text',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Compare original text vs modified text and use an LLM to measure style drift.
This tool rates stylistic similarity (0-100 score), explains what drifted
(sentence length, tone, vocabulary, etc.), and can compare against an
author profile if provided.
""",
epilog="""
Examples:
%(prog)s --original original.txt --modified modified.txt
%(prog)s --original original.txt --modified modified.txt --profile author.json
%(prog)s --original original.txt --modified modified.txt --output report.json
Score interpretation:
90-100: Excellent - Style nearly perfectly preserved
75-89: Good - Minor stylistic differences
50-74: Moderate - Noticeable drift in some areas
25-49: Poor - Significant style changes
0-24: Severe - Almost entirely different style
Environment variables:
ANTHROPIC_API_KEY Your Anthropic API key (required)
"""
)
parser.add_argument(
'--original', '-o',
required=True,
help='Path to the original text file'
)
parser.add_argument(
'--modified', '-m',
required=True,
help='Path to the modified text file'
)
parser.add_argument(
'--profile', '-p',
help='Optional path to author profile JSON file for additional comparison'
)
parser.add_argument(
'--output',
help='Path to save JSON output (default: print to console)'
)
parser.add_argument(
'--model',
default='claude-sonnet-4-5-20250929',
help='Anthropic model to use (default: claude-sonnet-4-5-20250929)'
)
parser.add_argument(
'--json',
action='store_true',
help='Output only JSON (no formatted output)'
)
def run_validate(args, verbose: bool = False) -> int:
"""Run the validate subcommand."""
try:
from style_validator import (
validate_style, load_text_file, load_author_profile,
result_to_dict, print_result
)
import json
except ImportError:
print("Error: Could not import style_validator module")
print("Make sure style_validator.py is in the same directory")
return 1
# Load files
original_text = load_text_file(args.original)
modified_text = load_text_file(args.modified)
# Load profile if provided
profile = None
if args.profile:
profile = load_author_profile(args.profile)
# Run validation
result = validate_style(
original_text=original_text,
modified_text=modified_text,
profile=profile,
model=args.model,
verbose=verbose
)
# Output results
result_dict = result_to_dict(result)
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
json.dump(result_dict, f, indent=2, ensure_ascii=False)
print(f"Results saved to: {args.output}")
if not args.json:
print_result(result)
elif args.json:
print(json.dumps(result_dict, indent=2, ensure_ascii=False))
else:
print_result(result)
# Exit with non-zero if severe drift
if result.overall_score < 25:
return 2
elif result.overall_score < 50:
return 1
return 0
# ---------- SUBCOMMAND: analyze ----------
def setup_analyze_parser(subparsers) -> None:
"""Set up the 'analyze' subcommand parser."""
parser = subparsers.add_parser(
'analyze',
help='Extract book model - characters, plot, timeline, relationships',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Extract a structured book model from EPUB using LLM analysis.
The book model includes:
- Characters (names, descriptions, relationships)
- Locations (names, descriptions, significance)
- Timeline (chapter-by-chapter events)
- Plot structure (setup, rising action, climax, resolution)
- Themes and narrative notes
""",
epilog="""
Examples:
%(prog)s --input book.epub --output book_model.json
%(prog)s --input book.epub --model claude-sonnet-4-5-20250929
%(prog)s --input book.epub -v
Environment variables:
ANTHROPIC_API_KEY Your Anthropic API key (required)
"""
)
parser.add_argument(
'--input', '-i',
required=True,
help='Input EPUB file path'
)
parser.add_argument(
'--output', '-o',
help='Output JSON file path (default: book_model.json)'
)
parser.add_argument(
'--model', '-m',
default='claude-sonnet-4-5-20250929',
help='Claude model to use (default: claude-sonnet-4-5-20250929)'
)
parser.add_argument(
'--rate-limit-delay',
type=float,
default=0.5,
help='Delay between API calls in seconds (default: 0.5)'
)
def run_analyze(args, verbose: bool = False) -> int:
"""Run the analyze subcommand."""
try:
from book_analyzer import analyze_book
except ImportError:
print("Error: Could not import book_analyzer module")
print("Make sure book_analyzer.py is in the same directory")
return 1
# Validate input file
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {args.input}")
return 1
# Determine output path
if args.output:
output_path = args.output
else:
output_path = str(input_path.with_suffix('.book_model.json'))
# Run analysis
analyze_book(
input_path=str(input_path),
output_path=output_path,
model=args.model,
rate_limit_delay=args.rate_limit_delay,
verbose=verbose
)
return 0
# ---------- SUBCOMMAND: plan ----------
def setup_plan_parser(subparsers) -> None:
"""Set up the 'plan' subcommand parser."""
parser = subparsers.add_parser(
'plan',
help='Plan changes - generate modification plans with ripple effect mapping',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Generate a change plan for modifying a book based on user goals.
Takes a book model and user's change request, then:
- Interprets user intent
- Identifies affected chapters/passages
- Maps ripple effects
- Generates a structured change plan
NOTE: This module is not yet implemented.
""",
epilog="""
Examples:
%(prog)s --model book_model.json --goal "Convert to steampunk setting" --output plan.json
%(prog)s --model book_model.json --goal "Remove character X" --output plan.json
"""
)
parser.add_argument(
'--model', '-m',
required=True,
help='Path to book_model.json file'
)
parser.add_argument(
'--goal', '-g',
required=True,
help='Description of the desired changes'
)
parser.add_argument(
'--output', '-o',
default='change_plan.json',
help='Output change plan JSON file (default: change_plan.json)'
)
def run_plan(args, verbose: bool = False) -> int:
"""Run the plan subcommand."""
print("Error: change_planner module is not yet implemented.")
print("This feature is planned for a future release.")
print("")
print("The change planner will:")
print(" - Take a book model and user's change request")
print(" - Identify affected chapters/passages")
print(" - Map ripple effects across the narrative")
print(" - Generate a structured change plan")
return 1
# ---------- SUBCOMMAND: check ----------
def setup_check_parser(subparsers) -> None:
"""Set up the 'check' subcommand parser."""
parser = subparsers.add_parser(
'check',
help='Verify consistency - cross-chapter validation for contradictions',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Check a modified EPUB for internal consistency and contradictions.
This tool validates:
- Character consistency (names, traits, relationships)
- Timeline coherence
- Location consistency
- Plot logic
NOTE: This module is not yet implemented.
""",
epilog="""
Examples:
%(prog)s --input modified.epub --model book_model.json --output report.json
%(prog)s --input modified.epub --original original.epub --output report.json
"""
)
parser.add_argument(
'--input', '-i',
required=True,
help='Input EPUB file to check'
)
parser.add_argument(
'--model', '-m',
help='Path to book_model.json file for reference'
)
parser.add_argument(
'--original', '-o',
help='Path to original EPUB for comparison'
)
parser.add_argument(
'--output',
default='consistency_report.json',
help='Output report JSON file (default: consistency_report.json)'
)
def run_check(args, verbose: bool = False) -> int:
"""Run the check subcommand."""
print("Error: consistency_checker module is not yet implemented.")
print("This feature is planned for a future release.")
print("")
print("The consistency checker will:")
print(" - Validate character consistency across chapters")
print(" - Check timeline coherence")
print(" - Detect contradictions introduced by edits")
print(" - Generate a detailed validation report")
return 1
# ---------- SUBCOMMAND: estimate ----------
def setup_estimate_parser(subparsers) -> None:
"""Set up the 'estimate' subcommand parser."""
parser = subparsers.add_parser(
'estimate',
help='Estimate API costs - preview costs before processing',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Estimate API costs before processing an EPUB.
Analyzes the EPUB to count tokens and chapters, then calculates
estimated costs based on:
- Selected model (Haiku/Sonnet/Opus)
- Workflow type (cleanup/filter/transform/etc.)
- Optional features (profiling, analysis, drift validation)
Use this to budget before running expensive operations.
""",
epilog="""
Examples:
%(prog)s --input book.epub
%(prog)s --input book.epub --model opus --workflow transform
%(prog)s --input book.epub --all-features
%(prog)s --input book.epub --json
Model pricing (per million tokens, approximate):
Haiku: $0.80 input / $4 output
Sonnet: $3 input / $15 output
Opus: $15 input / $75 output
"""
)
parser.add_argument(
'--input', '-i',
required=True,
help='Input EPUB file path'
)
parser.add_argument(
'--model', '-m',
default='sonnet',
choices=['haiku', 'sonnet', 'opus'],
help='Model to estimate for (default: sonnet)'
)
parser.add_argument(
'--workflow', '-w',
default='cleanup',
choices=['cleanup', 'filter', 'modernize', 'transform', 'annotate'],
help='Workflow type (default: cleanup)'
)
parser.add_argument(
'--with-profile',
action='store_true',
help='Include style profiling cost'
)
parser.add_argument(
'--with-analysis',
action='store_true',
help='Include book analysis cost'
)
parser.add_argument(
'--with-plan',
action='store_true',
help='Include change planning cost'
)
parser.add_argument(
'--with-consistency',
action='store_true',
help='Include consistency checking cost'
)
parser.add_argument(
'--with-drift',
action='store_true',
help='Include drift validation cost'
)
parser.add_argument(
'--all-features',
action='store_true',
help='Include all optional features in estimate'
)
parser.add_argument(
'--json',
action='store_true',
help='Output as JSON (no formatted output)'
)
def run_estimate(args, verbose: bool = False) -> int:
"""Run the estimate subcommand."""
try:
from cost_estimator import estimate_cost
import json
except ImportError:
print("Error: Could not import cost_estimator module")
print("Make sure cost_estimator.py is in the same directory")
return 1
# Validate input file
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {args.input}")
return 1
# Handle --all-features
if args.all_features:
args.with_profile = True
args.with_analysis = True
args.with_plan = True
args.with_consistency = True
args.with_drift = True
# Run estimation
result = estimate_cost(
epub_path=str(input_path),
model=args.model,
workflow=args.workflow,
with_profile=args.with_profile,
with_book_analysis=args.with_analysis,
with_change_plan=args.with_plan,
with_consistency_check=args.with_consistency,
with_drift_validation=args.with_drift,
verbose=not args.json
)
if args.json:
print(json.dumps(result, indent=2))
return 0
# ---------- SUBCOMMAND: annotate ----------
def setup_annotate_parser(subparsers) -> None:
"""Set up the 'annotate' subcommand parser."""
parser = subparsers.add_parser(
'annotate',
help='Add commentary - generate footnotes/endnotes in various styles',
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
Generate and insert annotations/commentary into an EPUB.
This command combines the annotator (generate commentary) and footnote_inserter
(insert into EPUB) functionality.
Commentary styles:
- scholarly: Literary analysis, sources, references
- historical: Period context, author biography, events
- educational: Vocabulary, concepts, explanations
- devils_advocate: Challenge assumptions, alternative views
- thematic: Connections to other works, parallels
- personal_lens: User-specified perspective
- fun_facts: Trivia, behind-the-scenes, inspirations
- funny: Humorous observations, witty asides
- cross_reference: Links to other texts, author's other works
""",
epilog="""
Examples:
%(prog)s --input book.epub --config commentary.yaml --output annotated.epub
%(prog)s --input book.epub --style scholarly,funny --format footnotes --output annotated.epub
%(prog)s --input book.epub --config commentary.yaml --dry-run
Config file structure (commentary.yaml):
styles: [scholarly, funny]
frequency: "2-4 per chapter"
focus_areas: [character motivation, historical context]
avoid: [spoilers]
Environment variables:
ANTHROPIC_API_KEY Your Anthropic API key (required)
"""
)
parser.add_argument(
'--input', '-i',
required=True,
help='Input EPUB file path'
)
parser.add_argument(
'--output', '-o',
help='Output EPUB file path (default: input_annotated.epub)'
)
parser.add_argument(
'--config', '-c',
help='Path to commentary config YAML file'
)
parser.add_argument(
'--style', '-s',
help='Comma-separated commentary styles (e.g., scholarly,funny)'
)
parser.add_argument(
'--format', '-f',
choices=['footnotes', 'endnotes'],
default='footnotes',
help='Note placement: footnotes (end of chapter) or endnotes (end of book)'
)
parser.add_argument(
'--frequency',
default='2-4 per chapter',
help='Annotation frequency (e.g., "2-4 per chapter")'
)
parser.add_argument(
'--dry-run', '-n',
action='store_true',
help='Analyze and select passages without generating commentary'
)
parser.add_argument(
'--annotations-only',
action='store_true',
help='Only generate annotations JSON, do not insert into EPUB'
)
parser.add_argument(
'--list-chapters',
action='store_true',
help='List available chapters and exit (use to find chapter numbers)'
)
parser.add_argument(
'--chapters',
help='Process only specific chapters (e.g., "1-3", "4,6,8", "1-3,7")'
)
def run_annotate(args, verbose: bool = False) -> int:
"""Run the annotate subcommand."""
import json
import tempfile
# Try to import annotator
try:
from annotator import (
load_config as load_annotator_config, create_client, process_epub,
save_annotations, DEFAULT_CONFIG, list_epub_chapters, parse_chapter_selection
)
except ImportError:
print("Error: Could not import annotator module")
print("Make sure annotator.py is in the same directory")
return 1
# Validate input file
input_path = Path(args.input)
if not input_path.exists():
print(f"Error: Input file not found: {args.input}")
return 1
# Handle --list-chapters
if args.list_chapters:
print(f"\nChapters in {input_path.name}:\n")
print(f"{'#':<4} {'Title':<40} {'File':<30}")
print("-" * 74)
chapters = list_epub_chapters(str(input_path))
for ch in chapters:
title = ch['title'][:38] + '..' if len(ch['title']) > 40 else ch['title']
print(f"{ch['index']:<4} {title:<40} {ch['file']:<30}")
print(f"\nTotal: {len(chapters)} files")
print("\nUse --chapters to select specific chapters, e.g.:")
print(f" python cli.py annotate --input {args.input} --chapters 4-6")
return 0
# Determine output path
if args.output:
output_path = args.output
else:
output_path = str(input_path.with_stem(input_path.stem + '_annotated'))
# Build config
if args.config:
config = load_annotator_config(args.config)
else:
config = DEFAULT_CONFIG.copy()
# Override with CLI arguments
if args.style:
config['styles'] = [s.strip().lower().replace("'", "").replace(" ", "_")
for s in args.style.split(',')]
if args.frequency:
config['frequency'] = args.frequency
# Validate that we have styles
if not config.get('styles'):
print("Error: No commentary styles specified.")
print("Use --style or provide a config file with --config")
return 1
# Create LLM client
client = create_client(config)
# Parse chapter selection if provided
chapter_filter = None
if args.chapters:
# Get total chapter count first
chapters = list_epub_chapters(str(input_path))
chapter_filter = parse_chapter_selection(args.chapters, len(chapters))
if not chapter_filter:
print("Error: No valid chapters selected")
return 1
# Generate annotations
annotations = process_epub(
input_path=str(input_path),
config=config,
client=client,
dry_run=args.dry_run,
chapter_filter=chapter_filter
)
if args.dry_run:
print(f"\nDry run complete. Would have generated {len(annotations)} annotations.")
return 0
if not annotations:
print("\nNo annotations generated.")
return 0
# Save annotations JSON
annotations_file = output_path.replace('.epub', '_annotations.json')
save_annotations(annotations, annotations_file, config, str(input_path))
if args.annotations_only:
print(f"\nAnnotations saved to: {annotations_file}")
print("Use footnote_inserter.py to insert them into the EPUB.")
return 0
# Insert annotations into EPUB
try:
from footnote_inserter import process_epub as insert_footnotes
except ImportError:
print("Error: Could not import footnote_inserter module")
print("Annotations saved to JSON. Use footnote_inserter.py to insert them.")
return 1
# Convert annotations to the format expected by footnote_inserter
# The annotator outputs a different format than footnote_inserter expects
formatted_annotations = []
for ann in annotations:
formatted_annotations.append({
'chapter': ann['chapter_file'],
'paragraph_index': ann['paragraph_index'],
'passage_text': ann['passage_text'],
'note_text': ann['commentary']['text'],
'note_type': ann['commentary']['style']
})
# Save reformatted annotations to temp file
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8') as f:
json.dump(formatted_annotations, f, indent=2, ensure_ascii=False)
temp_annotations_path = f.name
try:
# Insert footnotes
insert_footnotes(
input_path=str(input_path),
annotations_path=temp_annotations_path,
output_path=output_path,
note_format=args.format,
css_filename='footnotes.css'
)
finally:
# Clean up temp file
os.unlink(temp_annotations_path)
print(f"\nAnnotated EPUB saved to: {output_path}")
return 0
# ---------- MAIN CLI ----------
def create_parser() -> argparse.ArgumentParser:
"""Create the main argument parser with all subcommands."""
parser = argparse.ArgumentParser(
prog='booksmith',
description='Booksmith - Unified command-line interface for EPUB editing and analysis',