add script to convert episodes to pre-filled notebooks

qualiaMachine · qualiaMachine · commit 12ee981bf8e5 · 2025-11-07T10:31:19.000-06:00
diff --git a/.github/workflows/md_to_ipynb.yml b/.github/workflows/md_to_ipynb.yml
@@ -0,0 +1,35 @@
+name: Convert Markdown to Jupyter Notebooks
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  convert:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.8"
+
+      - name: Install dependencies
+        run: pip install nbformat pandoc
+
+      - name: Convert Markdown files to Jupyter Notebooks
+        run: python scripts/md_to_ipynb.py
+
+      - name: Commit and push changes
+        run: |
+          git config --global user.name "github-actions"
+          git config --global user.email "github-actions@github.com"
+          git add notebooks/*.ipynb
+          git commit -m "Auto-generate Jupyter notebooks from Markdown" || echo "No changes to commit"
+          git push
diff --git a/scripts/md_to_ipynb.py b/scripts/md_to_ipynb.py
@@ -0,0 +1,70 @@
+import os
+import nbformat
+import re
+from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell
+
+# Paths
+episodes_dir = "episodes"
+notebooks_dir = "notebooks"
+
+# List of Markdown files to ignore (no conversion needed)
+ignore_list = [
+    "01-Introduction.md",
+    "02-Data-storage.md",
+    "03-Notebooks-as-controllers.md",
+    "05-Interacting-with-code-repo.md",
+    "09-Resource-management-cleanup.md"
+]
+
+# Ensure notebooks directory exists
+os.makedirs(notebooks_dir, exist_ok=True)
+
+# Regular expression to detect code blocks (matches ```language\n...\n```)
+code_block_pattern = re.compile(r"```(\w+)?\n(.*?)\n```", re.DOTALL)
+
+def split_markdown(md_content):
+    """Splits Markdown content into separate Markdown and Code cells."""
+    cells = []
+    position = 0
+
+    for match in code_block_pattern.finditer(md_content):
+        # Extract text before the code block as Markdown
+        before_code = md_content[position:match.start()].strip()
+        if before_code:
+            cells.append(new_markdown_cell(before_code))
+        
+        # Extract code block content
+        code_content = match.group(2).strip()
+        if code_content:
+            cells.append(new_code_cell(code_content))
+
+        position = match.end()
+
+    # Add any remaining Markdown content after the last code block
+    remaining_md = md_content[position:].strip()
+    if remaining_md:
+        cells.append(new_markdown_cell(remaining_md))
+    
+    return cells
+
+# Convert each Markdown file in episodes/
+for filename in os.listdir(episodes_dir):
+    if filename.endswith(".md") and filename not in ignore_list:
+        md_path = os.path.join(episodes_dir, filename)
+        ipynb_path = os.path.join(notebooks_dir, filename.replace(".md", ".ipynb"))
+
+        # Read Markdown content
+        with open(md_path, "r", encoding="utf-8") as f:
+            md_content = f.read()
+
+        # Split into Markdown and Code cells
+        notebook_cells = split_markdown(md_content)
+
+        # Create Jupyter notebook
+        nb = new_notebook(cells=notebook_cells)
+
+        # Save as .ipynb
+        with open(ipynb_path, "w", encoding="utf-8") as f:
+            nbformat.write(nb, f)
+
+print("Conversion complete! Excluded:", ", ".join(ignore_list))