-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbooklet_generator.py
More file actions
73 lines (62 loc) · 2.37 KB
/
booklet_generator.py
File metadata and controls
73 lines (62 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
"""
Generate a PDF booklet of Targum Onkelos (Taj edition) from Hebrew Wikisource.
Book headers show parsha and perek; each perek ends with a "next perek" indicator.
"""
import os
import argparse
from datetime import datetime
from jinja2 import Environment, FileSystemLoader
from weasyprint import HTML, CSS
from scraper import fetch_data
TEMPLATE_DIR = os.path.join(os.path.dirname(__file__), "templates")
ASSETS_DIR = os.path.join(os.path.dirname(__file__), "assets")
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), "output")
def generate_booklet(limit_parshiyot=None, output_path=None):
"""Fetch Onkelos data, render HTML, and generate PDF."""
print("Step 1: Fetching data from Hebrew Wikisource...")
blocks = fetch_data(limit_parshiyot=limit_parshiyot)
if not blocks:
print("No data fetched. Aborting.")
return
print(f"Step 2: Rendering HTML ({len(blocks)} perakim)...")
env = Environment(loader=FileSystemLoader(TEMPLATE_DIR))
template = env.get_template("onkelos.html")
html_content = template.render(blocks=blocks)
os.makedirs(OUTPUT_DIR, exist_ok=True)
if output_path is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(OUTPUT_DIR, f"onkelos_booklet_{timestamp}.pdf")
debug_html = os.path.join(OUTPUT_DIR, "onkelos_debug.html")
with open(debug_html, "w", encoding="utf-8") as f:
f.write(html_content)
print(f"Debug HTML: {debug_html}")
print("Step 3: Generating PDF...")
try:
HTML(string=html_content, base_url=os.path.dirname(__file__)).write_pdf(
output_path,
stylesheets=[CSS(os.path.join(ASSETS_DIR, "styles.css"))],
)
print(f"SUCCESS: {output_path}")
except Exception as e:
print(f"Error generating PDF: {e}")
raise
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generate Targum Onkelos booklet PDF from Hebrew Wikisource"
)
parser.add_argument(
"--limit",
type=int,
default=None,
metavar="N",
help="Fetch only first N parshiyot (for testing)",
)
parser.add_argument(
"-o", "--output",
type=str,
default=None,
help="Output PDF path",
)
args = parser.parse_args()
generate_booklet(limit_parshiyot=args.limit, output_path=args.output)