Skip to content

Commit e1e4484

Browse files
authored
Refactor code and add test (#5)
* refactor code * add test_sequentialize_header_priorities * add test for get_headers * use scm version * add tests test_create_github_header_anchor and test_create_table_of_contents * add test for find_toc_start and find_toc_end * add test_create_table_of_contents_github * extract write_results and add tests * add python36 support
1 parent bb9cdd8 commit e1e4484

File tree

9 files changed

+230
-33
lines changed

9 files changed

+230
-33
lines changed

bin/markdown-toc

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env python3
22

3-
import markdowntoc
3+
from src import markdowntoc
4+
45
markdowntoc.main()

dev-requirements.txt

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#
2+
# This file is autogenerated by pip-compile with python 3.6
3+
# To update, run:
4+
#
5+
# pip-compile --extra=dev --output-file=dev-requirements.txt
6+
#
7+
attrs==22.1.0
8+
# via pytest
9+
importlib-metadata==4.8.3
10+
# via
11+
# pluggy
12+
# pytest
13+
iniconfig==1.1.1
14+
# via pytest
15+
packaging==21.3
16+
# via pytest
17+
pluggy==1.0.0
18+
# via pytest
19+
py==1.11.0
20+
# via pytest
21+
pyparsing==3.0.9
22+
# via packaging
23+
pytest==6.2.5
24+
# via markdowntoc (setup.py)
25+
toml==0.10.2
26+
# via pytest
27+
typing-extensions==4.1.1
28+
# via importlib-metadata
29+
zipp==3.6.0
30+
# via importlib-metadata

markdowntoc/__init__.py

-3
This file was deleted.

setup.py

+16-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from setuptools import setup
1+
from setuptools import find_packages, setup
22

33

44
def readme():
@@ -7,17 +7,25 @@ def readme():
77

88

99
setup(
10-
name="markdown-toc",
11-
version="1.2.4", # Reflected in __init__
10+
name="markdowntoc",
11+
setup_requires=["setuptools_scm"],
12+
use_scm_version=True,
1213
description="Autogenerated Table of Contents for Github Markdown or Bear Notes",
1314
long_description=readme(),
1415
long_description_content_type="text/markdown",
15-
url="https://github.com/alexander-lee/markdown-github-bear-toc",
16-
author="Alexander Lee",
17-
author_email="[email protected]",
16+
url="https://github.com/qiaouchicago/markdown-toc",
17+
author="Qiao Qiao",
18+
author_email="[email protected]",
1819
license="LICENSE",
1920
keywords="markdown md github bear table of contents toc",
20-
packages=["markdowntoc"],
21-
install_requires=["python-dateutil"],
21+
package_dir={"": "src"},
22+
packages=find_packages(where="src"),
23+
include_package_data=True,
24+
install_requires=[],
25+
extras_require={
26+
"dev": [
27+
"pytest~=6.2",
28+
]
29+
},
2230
scripts=["bin/markdown-toc"],
2331
)

src/markdowntoc/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from pkg_resources import get_distribution
2+
3+
from .markdowntoc import main
4+
5+
VERSION = get_distribution(__name__).version

markdowntoc/markdowntoc.py src/markdowntoc/markdowntoc.py

+13-21
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import argparse
44
from os import path
5-
from urllib.parse import quote
65

76

87
def get_parser():
@@ -52,7 +51,7 @@ def get_parser():
5251
return parser
5352

5453

55-
def get_headers(md_text, max_priority):
54+
def get_headers(md_text, max_priority=3):
5655
"""
5756
Retrieves a list of header, priority pairs in a given Markdown text.
5857
@@ -107,16 +106,6 @@ def sequentialize_header_priorities(header_priority_pairs):
107106
return header_priority_pairs
108107

109108

110-
def create_bear_header_anchor(header_title, note_uuid):
111-
"""
112-
Returns a markdown anchor of a Bear x-callback-url to the header.
113-
"""
114-
header_title_url_safe = quote(header_title)
115-
return "[{}](bear://x-callback-url/open-note?id={}&header={})".format(
116-
header_title, note_uuid, header_title_url_safe
117-
)
118-
119-
120109
def create_github_header_anchor(header_title):
121110
"""
122111
Returns GitHub Markdown anchor to the header.
@@ -208,16 +197,9 @@ def find_toc_end(md_text_lines):
208197
return len(md_text_lines)
209198

210199

211-
def main():
212-
parser = get_parser()
213-
214-
args = parser.parse_args()
215-
params = vars(args)
216-
217-
md_text_toc_pairs, identifiers = create_table_of_contents_github(params)
218-
200+
def write_results(md_text_toc_pairs, identifiers, to_file=True):
219201
for i, (md_text, toc_lines) in enumerate(md_text_toc_pairs):
220-
if params["write"]:
202+
if to_file:
221203
# Inject Table of Contents (Title, \n, Table of Contents, \n, Content)
222204
text_list = md_text.splitlines()
223205
toc_start = find_toc_start(text_list)
@@ -240,5 +222,15 @@ def main():
240222
print("\n".join(toc_lines) + "\n")
241223

242224

225+
def main():
226+
parser = get_parser()
227+
228+
args = parser.parse_args()
229+
params = vars(args)
230+
231+
md_text_toc_pairs, identifiers = create_table_of_contents_github(params)
232+
write_results(md_text_toc_pairs, identifiers, params["write"])
233+
234+
243235
if __name__ == "__main__":
244236
main()

tests/__init__.py

Whitespace-only changes.

tests/data/processed.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
data = [
2+
(
3+
'[![Build Status](https://app.travis-ci.com/NCI-GDC/plaster.svg?token=5s3bZRahNJnkspYEMwZC&branch=master)](https://app.travis-ci.com/NCI-GDC/plaster)\n\n# The Data Commons Model Source Generator Project (Plaster)\n\nGDC internship project for generating data model source code.\n\n<!-- toc -->\n<!-- tocstop -->\n\n# Purpose\n\nThis project is a drop-in replacement to the project\nhttps://github.com/NCI-GDC/gdcdatamodel, without challenges and obscurity associated\nwith using gdcdatamodel. The resulting code will be readable, pass static and linting\nchecks, completely remove delays from dictionary load times.\n\n# Goal\n\nGiven any compliant gdcdictionary, generate source code that can replace the\ngdcdatamodel runtime generated code.\n\n# Data Commons Models\n\nThe data commons are a collection of data structures representing concepts within a\nsubject area. These data structures usually form a graph with edges as relationships to\none another. The data structures and relationships are defined as JSON schema in yaml\nfiles that are distributed via a git repository. These definitions are called\nDictionaries for short. The gdcdictionary is one example of a data commons with a\nprimarily focus on cancer. Dictionaries are updated and released frequently, with each\nrelease adding or removing nodes, edges, or properties.\n\nThese data structures are converted to Python source code at runtime by the gdcdatamodel\nproject. For example, the case yaml file will autogenerate the models.Case Python class\nwith properties and methods matching those defined in the yaml file. The generated\nsource codes are sqlalchemy database entities that map to tables in the database.\n\nThe psqlgraph project makes querying using these entities more uniform across different\nuse cases, by exposing common modules, classes and functions that are useful for\nmanipulating data stored using sqlalchemy.\n\n## Problems:\n\n- Runtime generated code cannot be peer reviewed or inspected. This forces developers to\n switch between dictionary definitions and code to understand what a particular piece\n of code is doing. Most projects within the center have this problem since they all\n rely on gdcdatamodel for the database entities.\n- Runtime generated code also means no type checking, linting and little chance of\n running static analysis tools like flake8\n- Runtime model code generation takes a few seconds (might be a few minutes - Qiao) to\n complete. This means that any project that makes use of gdcdatamodel must pay for this\n in one way or another. The most common is usually start up time.\n\nIn summary, most projects within the center suffer just because they rely on\ngdcdatamodel for database entities. The major goal of this project is to eliminate the\nruntime code generation feature on gdcdatamodel, thereby eliminating the above-mentioned\nproblems.\n\n# Project Details\n\n## Requirements\n\n- Python >= 3.8\n- No direct dependency on any dictionary versions\n- Must expose scripts that can be invoked to generate source code\n- Must include unit and integration tests with over 80% code coverage\n- Must provide typings and pass mypy checks\n\n## Features\n\n- Dictionary selection and loading\n- Template management\n- Code generation\n- Scripts\n\n## Dictionary selection and loading\n\nThis module will be responsible for loading a dictionary given necessary parameters.\nThese parameters will include:\n\n- A git URL\n- A target version, tag, commit or branch name\n- A label used for referencing the dictionary later\n\n## Template Management\n\nThis module will be responsible for the templates used to generate the final source code\n\n# How to use\n\n## Install plaster\n\n```bash\npip install .\n```\n\n## Generate gdcdictionary\n\n```bash\nplaster generate -p gdcdictionary -o "example/gdcdictionary"\n```\n\n## Generate biodictionary\n\n```bash\nplaster generate -p biodictionary -o "example/biodictionary"\n```\n\n# Associated Projects\n\n- biodictionary: https://github.com/NCI-GDC/biodictionary\n- gdcdatamodel: https://github.com/gdcdatamodel\n- gdcdictionary: https://github.com/NCI-GDC/gdcdictionary\n- psqlgml: https://github.com/NCI-GDC/psqlgml\n- psqlgraph: https://github.com/NCI-GDC/psqlgraph\n\n# Repo Visualizer\n\n![Visualization of this repo](images/diagram.svg)\n',
4+
[
5+
"# Table of Contents",
6+
"",
7+
"- [Purpose](#Purpose)",
8+
"- [Goal](#Goal)",
9+
"- [Data Commons Models](#Data-Commons-Models)",
10+
" - [Problems:](#Problems:)",
11+
"- [Project Details](#Project-Details)",
12+
" - [Requirements](#Requirements)",
13+
" - [Features](#Features)",
14+
" - [Dictionary selection and loading](#Dictionary-selection-and-loading)",
15+
" - [Template Management](#Template-Management)",
16+
"- [How to use](#How-to-use)",
17+
" - [Install plaster](#Install-plaster)",
18+
" - [Generate gdcdictionary](#Generate-gdcdictionary)",
19+
" - [Generate biodictionary](#Generate-biodictionary)",
20+
"- [Associated Projects](#Associated-Projects)",
21+
"- [Repo Visualizer](#Repo-Visualizer)",
22+
],
23+
)
24+
]

tests/test_markdowntoc.py

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import filecmp
2+
3+
import pkg_resources
4+
import pytest
5+
6+
from markdowntoc import (
7+
create_github_header_anchor,
8+
create_table_of_contents,
9+
create_table_of_contents_github,
10+
find_toc_end,
11+
find_toc_start,
12+
get_headers,
13+
get_parser,
14+
sequentialize_header_priorities,
15+
write_results,
16+
)
17+
from tests.data import processed
18+
19+
20+
@pytest.fixture
21+
def get_data():
22+
file = pkg_resources.resource_filename("tests", "data/before.md")
23+
with open(file) as fp:
24+
return fp.read()
25+
26+
27+
@pytest.fixture
28+
def get_md_lines(get_data):
29+
return get_data.splitlines()
30+
31+
32+
def test_sequentialize_header_priorities():
33+
data = [("Header 1", 1), ("Header 3", 3), ("Header 4", 4)]
34+
result = sequentialize_header_priorities(data)
35+
assert result == [("Header 1", 1), ("Header 3", 2), ("Header 4", 3)]
36+
37+
data = [("Header 1", 1), ("Header 2", 2), ("Header 3", 3)]
38+
result = sequentialize_header_priorities(data)
39+
assert result == [("Header 1", 1), ("Header 2", 2), ("Header 3", 3)]
40+
41+
42+
def test_get_headers(get_data):
43+
headers = get_headers(get_data)
44+
assert headers == [
45+
("The Data Commons Model Source Generator Project (Plaster)", 1),
46+
("Purpose", 1),
47+
("Goal", 1),
48+
("Data Commons Models", 1),
49+
("Problems:", 2),
50+
("Project Details", 1),
51+
("Requirements", 2),
52+
("Features", 2),
53+
("Dictionary selection and loading", 2),
54+
("Template Management", 2),
55+
("How to use", 1),
56+
("Install plaster", 2),
57+
("Generate gdcdictionary", 2),
58+
("Generate biodictionary", 2),
59+
("Associated Projects", 1),
60+
("Repo Visualizer", 1),
61+
]
62+
63+
64+
def test_create_github_header_anchor():
65+
res = create_github_header_anchor("Purpose")
66+
assert res == "[Purpose](#Purpose)"
67+
68+
69+
def test_create_table_of_contents():
70+
pairs = [
71+
("The Data Commons Model Source Generator Project (Plaster)", 1),
72+
("Purpose", 1),
73+
("Goal", 1),
74+
("Data Commons Models", 1),
75+
("Problems:", 2),
76+
("Project Details", 1),
77+
("Requirements", 2),
78+
("Features", 2),
79+
("Dictionary selection and loading", 2),
80+
("Template Management", 2),
81+
("How to use", 1),
82+
("Install plaster", 2),
83+
("Generate gdcdictionary", 2),
84+
("Generate biodictionary", 2),
85+
("Associated Projects", 1),
86+
("Repo Visualizer", 1),
87+
]
88+
89+
res = create_table_of_contents(pairs)
90+
assert res == [
91+
"# Table of Contents",
92+
"",
93+
"- [The Data Commons Model Source Generator Project (Plaster)](#The-Data-Commons-Model-Source-Generator-Project-(Plaster))",
94+
"- [Purpose](#Purpose)",
95+
"- [Goal](#Goal)",
96+
"- [Data Commons Models](#Data-Commons-Models)",
97+
" - [Problems:](#Problems:)",
98+
"- [Project Details](#Project-Details)",
99+
" - [Requirements](#Requirements)",
100+
" - [Features](#Features)",
101+
" - [Dictionary selection and loading](#Dictionary-selection-and-loading)",
102+
" - [Template Management](#Template-Management)",
103+
"- [How to use](#How-to-use)",
104+
" - [Install plaster](#Install-plaster)",
105+
" - [Generate gdcdictionary](#Generate-gdcdictionary)",
106+
" - [Generate biodictionary](#Generate-biodictionary)",
107+
"- [Associated Projects](#Associated-Projects)",
108+
"- [Repo Visualizer](#Repo-Visualizer)",
109+
]
110+
111+
112+
def test_find_toc_start(get_md_lines):
113+
line_number = find_toc_start(get_md_lines)
114+
assert line_number == 7
115+
116+
117+
def test_find_toc_end(get_md_lines):
118+
line_number = find_toc_end(get_md_lines)
119+
assert line_number == 7
120+
121+
122+
def test_create_table_of_contents_github():
123+
parser = get_parser()
124+
args = parser.parse_args()
125+
params = vars(args)
126+
params["name"] = [pkg_resources.resource_filename("tests", "data/before.md")]
127+
128+
res = create_table_of_contents_github(params)
129+
130+
assert res[0] == processed.data
131+
assert res[1] == params["name"]
132+
133+
134+
def test_write_results(tmp_path):
135+
md_text_toc_pairs = processed.data
136+
identifiers = [tmp_path / "result.md"]
137+
write_results(md_text_toc_pairs, identifiers)
138+
139+
after = pkg_resources.resource_filename("tests", "data/after.md")
140+
assert filecmp.cmp(identifiers[0], after)

0 commit comments

Comments
 (0)