Skip to content

Fix notebook converter #1360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 1 addition & 34 deletions notebook_converter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ conda install -c conda-forge pandoc
## Running the Converter
```bash
python3 notebook_converter/notebook_to_demo.py \
path/to/my/notebook.ipynb \
--author "John Doe" "Some info about author" "some/path/to/profile/picture/john_doe.png"
path/to/my/notebook.ipynb
```

### If the notebook is not going to be executable by sphinx-build
Expand All @@ -35,7 +34,6 @@ After that, you can indicate if the notebook is not executable by setting `--is-
```bash
python3 notebook_converter/notebook_to_demo.py \
path/to/my/notebook.ipynb \
--author "John Doe" "Some info about author" "some/path/to/profile/picture/john_doe.png"
--is-executable=False
```

Expand All @@ -49,37 +47,6 @@ If it is omitted, then this information is determined based on the notebook name
The path to the notebook, if absolute path is passed, it will be used verbatim. If relative path is passed,
it must be relative to the script's location.

#### `--author` (optional)
Information about the author, in the syntax of `--author "Full Name" "Bio" "path/to/profile_picture.png"`

For demos with multiple authors, then this flag can be passed multiple times:
```bash
python3 notebook_converter/notebook_to_demo.py \
path/to/my/notebook.ipynb \
--author "John Doe" "Some info about author" "some/path/to/profile/picture/john_doe.png" \
--author "Jane Doe" "Some info about author" "some/path/to/profile/picture/jane_doe.png" \
--is-executable=False
```

#### `--author-file` (optional)
If the notebook being converted is being used by an existing author, this option can be used to pass the location of the existing author-file.

```bash
--author-file "qml/_static/authors/john_doe.txt"
```

Similar to `--author`, this option can be passed multiple times. It can also be passed alongside `--author`

```bash
python3 notebook_converter/notebook_to_demo.py \
path/to/my/notebook.ipynb \
--author "John Doe" "Some info about author" "some/path/to/profile/picture/john_doe.png" \
--author "Jane Doe" "Some info about author" "some/path/to/profile/picture/jane_doe.png" \
--author-file "path/to/bob_doe.txt" \
--author-file "path/to/rob_doe.txt" \
--is-executable=False
```

#### `--is-executable` (True|False) (optional)
Indicate if the notebook is intended to be an executable demo or non-executable. If this is not passed,
the information is inferred from the notebook name. If the notebook name startswith `tutorial_` then it is
Expand Down
155 changes: 5 additions & 150 deletions notebook_converter/notebook_to_demo.py
Original file line number Diff line number Diff line change
@@ -1,143 +1,22 @@
#!/usr/bin/env python3

import json
import os
import re
import json
import shutil
from itertools import chain
from pathlib import Path, PurePosixPath
from base64 import b64decode
from typing import Dict, List, Union, Optional
from pathlib import Path, PurePosixPath
from typing import Dict, List, Optional, Union

import pypandoc

CWD = Path(os.path.dirname(os.path.realpath(__file__)))
REPO_ROOT = CWD.parent

MATCH_AUTHOR_FILE = re.compile(r"\.{2} +bio:{2} +(?P<name>[\w '\-]+)\n+ *(:photo:)? *(?P<profile_picture>.*\.[a-zA-Z0-9]+)?\n+ *(?P<bio>.*)",
flags=re.M | re.S)

AUTHORS = {
"link-dir": PurePosixPath("../_static/authors"),
"save-dir": REPO_ROOT / "_static" / "authors"
}
DEMO = {
"link-dir": PurePosixPath("../demonstrations"),
"save-dir": REPO_ROOT / "demonstrations"
}


def format_author_name(name: str) -> str:
return re.sub(r"[ \-'\u0080-\uFFFF]+", "_", name).lower()

def parse_author_file(author_file_path: Union[Path, str]) -> Optional[Dict]:
author_file_loc = Path(author_file_path)
author_file_name = author_file_loc.stem
with author_file_loc.open() as fh:
content = fh.read()

m = MATCH_AUTHOR_FILE.match(content)
if not m:
return None

profile_picture_path = m.group("profile_picture")
if profile_picture_path:
profile_picture_path = Path(profile_picture_path)
if not profile_picture_path.is_absolute():
if profile_picture_path.is_relative_to(AUTHORS["link-dir"]):
profile_picture_path = (REPO_ROOT / "_static" / profile_picture_path).resolve()
else:
profile_picture_path = (author_file_loc.parent / profile_picture_path).resolve()
else:
profile_picture_path = None
return {
"name": m.group("name"),
"bio": m.group("bio"),
"profile_picture": profile_picture_path,
"formatted_name": format_author_name(author_file_name)
}


def set_author_info(author: Dict) -> Path:
"""

:param author: A dictionary of author info with the following syntax
{
"name": Author Name,
"bio": Author Info,
"profile_picture": /path/to/profile_picture.png,
"formatted_name": "<Optional> Author name"
}
:return: Path object of the file that was saved and author info txt
"""
name = author["name"]
bio = author.get("bio", "").strip()
profile_picture_loc = author.get("profile_picture")
if profile_picture_loc:
profile_picture_loc = Path(profile_picture_loc)
name_formatted = author.get("name_formatted", profile_picture_loc.stem if profile_picture_loc else format_author_name(name)).lower()

if profile_picture_loc:
new_profile_picture_save_loc = AUTHORS["save-dir"] / profile_picture_loc.name
else:
new_profile_picture_save_loc = None

info_file_name = f"{name_formatted}.txt"
info_file_save_loc = AUTHORS["save-dir"] / info_file_name

if profile_picture_loc:
try:
shutil.copy(profile_picture_loc, new_profile_picture_save_loc)
except shutil.SameFileError:
pass

if profile_picture_loc:
author_link = (AUTHORS["link-dir"] / profile_picture_loc.name).as_posix()
photo_text = f":photo: {author_link}"
else:
photo_text = ""
author_txts = [f".. bio:: {name}"]
if photo_text:
author_txts.append(f" {photo_text}")
if bio:
if photo_text:
author_txts.append("")
author_txts.append(f" {bio}")
author_txt = "\n".join(author_txts)

with info_file_save_loc.open("w") as fh:
fh.write(author_txt)

return info_file_save_loc

def set_authors(*authors: Dict) -> str:
"""
:param authors: An unpacked list of dictionaries, each one having the following schema:
{
"name": Author Name,
"bio": Author Info,
"profile_picture": /path/to/profile_picture.png,
"formatted_name": "<Optional> Author name"
}
:return:
"""
author_files = [
AUTHORS['link-dir'] / set_author_info(author).name
for author in authors
]

header = [
"About the author",
"----------------"
]
author_txts = [
f"# .. include:: {author_txt_link}\n"
for author_txt_link in author_files
]

author_sphinx_txt = "\n".join([f"# {line}" for line in chain(header, author_txts)])
return f"\n\n{'#' * 70}\n{author_sphinx_txt}"

def str_to_bool(s: str) -> Optional[bool]:
if isinstance(s, bool):
return s
Expand Down Expand Up @@ -177,7 +56,7 @@ def add_property_newline(rst: str) -> str:
return re.sub(r"(\w+) (:property=)", r"\1\n \2", rst)


def generate_code_output_block(output_source: List[str] = None, only_header: bool = False) -> str:
def generate_code_output_block(output_source: Optional[List[str]] = None, only_header: bool = False) -> str:
output_header = "\n".join(
[
f"# {line}"
Expand Down Expand Up @@ -259,7 +138,7 @@ def convert_notebook_to_python(
output_data["text/plain"], only_header=j != 0
)
elif output["output_type"] == "display_data":
cell_id = cell["id"]
cell_id = cell.get("id", f"c_{i}")
if "text/plain" in output_data and "image/png" not in output_data:
if j == 0:
ret_python_str += generate_code_output_block()
Expand Down Expand Up @@ -337,36 +216,12 @@ def convert_notebook_to_python(
with notebook_file.open() as fh:
nb = json.load(fh)

authors = []
cli_authors = results.author or []
cli_authors_files = results.author_file or []
for author_file in cli_authors_files:
author_info = parse_author_file(author_file)
if author_info is None:
raise ValueError(f"Unable to parse author file {author_file}")
authors.append(author_info)
for author_info in cli_authors:
name = author_info[0]
bio = author_info[1]
profile_picture = author_info[2]
formatted_name = format_author_name(name)
authors.append({
"name": name,
"bio": bio,
"profile_picture": profile_picture,
"formatted_name": formatted_name
})

nb_py = convert_notebook_to_python(
nb,
notebook_file_name,
notebook_is_executable
)

if authors:
author_sphinx = set_authors(*authors)
nb_py += author_sphinx

with (DEMO["save-dir"] / f"{notebook_file_name}.py").open("w") as fh:
fh.write(nb_py)