-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathslide_processor.py
More file actions
94 lines (74 loc) · 3.12 KB
/
slide_processor.py
File metadata and controls
94 lines (74 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python3
"""
Presentation Build System
Compiles modular slides into single file or bundle with WebP image optimization
Usage:
./build.py # Direct execution
conda run -n superglue-env ./build.py # With specific conda env
"""
import re
import json
import base64
import shutil
import zipfile
from pathlib import Path
from datetime import datetime
from PIL import Image
import yaml
class SlideProcessor:
"""Handles slide collection and processing"""
def __init__(self, config, asset_manager):
self.config = config
self.asset_manager = asset_manager
def collect_slides(self, output_mode='bundle'):
"""Read slide files from config.yaml and discover assets"""
slides_dir = Path("slides")
# Get slide files from config.yaml
slide_configs = self.config.get('slides', [])
if not slide_configs:
print("❌ No slides defined in config.yaml")
return []
print(f"📄 Processing {len(slide_configs)} slides from config.yaml")
slides_content = []
for i, slide_config in enumerate(slide_configs, 1):
# Handle both formats
if isinstance(slide_config, str):
slide_filename = slide_config
else:
slide_filename = slide_config.get('file')
slide_file = slides_dir / slide_filename
if not slide_file.exists():
print(f" ❌ Slide not found: {slide_filename}")
continue
# Quietly process slide
content = slide_file.read_text(encoding='utf-8')
# Extract title from HTML
title = self._extract_title_from_html(content)
# Process assets in this slide
content, slide_assets = self.asset_manager.process_slide_assets(
content, slide_file, output_mode
)
# IMPORTANT: Remove any fetch() calls from slides
content = self._remove_fetch_calls(content)
slides_content.append({
'file': slide_filename,
'number': i,
'title': title,
'content': content,
'assets': slide_assets
})
self.asset_manager.assets_collected.extend(slide_assets)
return slides_content
def _extract_title_from_html(self, content):
"""Extract title from HTML <h1> tag"""
h1_match = re.search(r'<h1[^>]*>(.*?)</h1>', content, re.IGNORECASE | re.DOTALL)
if h1_match:
title = re.sub(r'<[^>]+>', '', h1_match.group(1)).strip()
return title
return "Untitled Slide"
def _remove_fetch_calls(self, content):
"""Replace fetch() calls with checks for embedded data"""
# Don't modify fetch calls for now since they're complex Promise chains
# The 3D slide already has proper fallback logic to use embedded data
# when available, so no replacement is needed
return content