Skip to content

Commit 1b6cdaa

Browse files
committed
opz: opz add
1 parent 71f5ef6 commit 1b6cdaa

File tree

7 files changed

+685
-19
lines changed

7 files changed

+685
-19
lines changed
File renamed without changes.

projects/opz/system_design_scenarios.md

Lines changed: 312 additions & 0 deletions
Large diffs are not rendered by default.

projects/sudoku/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
.env
22
.venv/**/**
33
soup_dump.html
4+
lib/__pycache__/**/**

projects/sudoku/lib/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from . import latex_to_md

projects/sudoku/lib/latex_to_md.py

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
import re
2+
from collections import defaultdict
3+
4+
# FROM: https://github.com/ajtulloch/LaTeX2Markdown
5+
# ------------------------------------------------------------------------------
6+
7+
# Basic configuration - modify this to change output formatting
8+
_block_configuration = {
9+
"chapter": {"markdown_heading": "##", "pretty_name": "", "show_count": False},
10+
"enumerate": {
11+
"line_indent_char": "",
12+
"list_heading": "1. ",
13+
"markdown_heading": "",
14+
"pretty_name": "",
15+
"show_count": False,
16+
},
17+
"exer": {
18+
"line_indent_char": "> ",
19+
"markdown_heading": "####",
20+
"pretty_name": "Exercise",
21+
"show_count": True,
22+
},
23+
"itemize": {
24+
"line_indent_char": "",
25+
"list_heading": "* ",
26+
"markdown_heading": "",
27+
"pretty_name": "",
28+
"show_count": False,
29+
},
30+
"lem": {
31+
"line_indent_char": "> ",
32+
"markdown_heading": "####",
33+
"pretty_name": "Lemma",
34+
"show_count": True,
35+
},
36+
"lstlisting": {
37+
"line_indent_char": " ",
38+
"markdown_heading": "",
39+
"pretty_name": "",
40+
"show_count": False,
41+
},
42+
"proof": {
43+
"line_indent_char": "",
44+
"markdown_heading": "####",
45+
"pretty_name": "Proof",
46+
"show_count": False,
47+
},
48+
"prop": {
49+
"line_indent_char": "> ",
50+
"markdown_heading": "####",
51+
"pretty_name": "Proposition",
52+
"show_count": True,
53+
},
54+
"section": {"markdown_heading": "###", "pretty_name": "", "show_count": False},
55+
"subsection": {"markdown_heading": "####", "pretty_name": "", "show_count": False},
56+
"thm": {
57+
"line_indent_char": "> ",
58+
"markdown_heading": "####",
59+
"pretty_name": "Theorem",
60+
"show_count": True,
61+
},
62+
}
63+
64+
# ------------------------------------------------------------------------------
65+
66+
67+
class LaTeX2Markdown(object):
68+
"""Initialise with a LaTeX string - see the main routine for examples of
69+
reading this string from an existing .tex file.
70+
71+
To modify the outputted markdown, modify the _block_configuration variable
72+
before initializing the LaTeX2Markdown instance."""
73+
74+
def __init__(
75+
self,
76+
latex_string,
77+
block_configuration=_block_configuration,
78+
block_counter=defaultdict(lambda: 1),
79+
):
80+
81+
self._block_configuration = block_configuration
82+
self._latex_string = latex_string
83+
self._block_counter = block_counter
84+
85+
# Precompile the regexes
86+
87+
# Select everything in the main matter
88+
self._main_re = re.compile(
89+
r"""\\begin{document}
90+
(?P<main>.*)
91+
\\end{document}""",
92+
flags=re.DOTALL + re.VERBOSE,
93+
)
94+
95+
# Select all our block materials.
96+
self._block_re = re.compile(
97+
r"""\\begin{(?P<block_name>exer|proof|thm|lem|prop)} # block name
98+
(\[(?P<block_title>.*?)\])? # Optional block title
99+
(?P<block_contents>.*?) # Non-greedy block contents
100+
\\end{(?P=block_name)}""", # closing block
101+
flags=re.DOTALL + re.VERBOSE,
102+
)
103+
104+
# Select all our list blocks
105+
self._lists_re = re.compile(
106+
r"""\\begin{(?P<block_name>enumerate|itemize)} # list name
107+
(\[.*?\])? # Optional enumerate settings i.e. (a)
108+
(?P<block_contents>.*?) # Non-greedy list contents
109+
\\end{(?P=block_name)}""", # closing list
110+
flags=re.DOTALL + re.VERBOSE,
111+
)
112+
113+
# Select all our headers
114+
self._header_re = re.compile(
115+
r"""\\(?P<header_name>chapter|section|subsection) # Header
116+
{(?P<header_contents>.*?)}""", # Header title
117+
flags=re.DOTALL + re.VERBOSE,
118+
)
119+
120+
# Select all our 'auxillary blocks' - these need special treatment
121+
# for future use - e.g. pygments highlighting instead of code blocks
122+
# in Markdown
123+
self._aux_block_re = re.compile(
124+
r"""\\begin{(?P<block_name>lstlisting)} # block name
125+
(?P<block_contents>.*?) # Non-greedy block contents
126+
\\end{(?P=block_name)}""", # closing block
127+
flags=re.DOTALL + re.VERBOSE,
128+
)
129+
130+
def _replace_header(self, matchobj):
131+
"""Creates a header string for a section/subsection/chapter match.
132+
For example, "### 2 - Integral Calculus\n" """
133+
134+
header_name = matchobj.group("header_name")
135+
header_contents = matchobj.group("header_contents")
136+
137+
header = self._format_block_name(header_name)
138+
139+
block_config = self._block_configuration[header_name]
140+
141+
# If we have a count, separate the title from the count with a dash
142+
separator = "-" if block_config.get("show_count") else ""
143+
144+
output_str = "{header} {separator} {title}\n".format(
145+
header=header, title=header_contents, separator=separator
146+
)
147+
148+
return output_str
149+
150+
def _replace_block(self, matchobj):
151+
"""Create a string that replaces an entire block.
152+
The string consists of a header (e.g. ### Exercise 1)
153+
and a block, containing the LaTeX code.
154+
155+
The block may be optionally indented, blockquoted, etc.
156+
These settings are customizable through the config.json
157+
file"""
158+
159+
block_name = matchobj.group("block_name")
160+
block_contents = matchobj.group("block_contents")
161+
# Block title may not exist, so use .get method
162+
block_title = matchobj.groupdict().get("block_title")
163+
164+
# We have to format differently for lists
165+
if block_name in {"itemize", "enumerate"}:
166+
formatted_contents = self._format_list_contents(block_name, block_contents)
167+
else:
168+
formatted_contents = self._format_block_contents(block_name, block_contents)
169+
170+
header = self._format_block_name(block_name, block_title)
171+
172+
output_str = "{header}\n\n{block_contents}".format(
173+
header=header, block_contents=formatted_contents
174+
)
175+
return output_str
176+
177+
def _format_block_contents(self, block_name, block_contents):
178+
"""Format the contents of a block with configuration parameters
179+
provided in the self._block_configuration attribute"""
180+
181+
block_config = self._block_configuration[block_name]
182+
183+
line_indent_char = block_config["line_indent_char"]
184+
185+
output_str = ""
186+
for line in block_contents.lstrip().rstrip().split("\n"):
187+
line = line.lstrip().rstrip()
188+
indented_line = line_indent_char + line + "\n"
189+
output_str += indented_line
190+
return output_str
191+
192+
def _format_list_contents(self, block_name, block_contents):
193+
"""To format a list, we must remove the \item declaration in the
194+
LaTeX source. All else is as in the _format_block_contents method."""
195+
block_config = self._block_configuration[block_name]
196+
197+
list_heading = block_config["list_heading"]
198+
199+
output_str = ""
200+
for line in block_contents.lstrip().rstrip().split("\n"):
201+
line = line.lstrip().rstrip()
202+
markdown_list_line = line.replace(r"\item", list_heading)
203+
output_str += markdown_list_line + "\n"
204+
return output_str
205+
206+
def _format_block_name(self, block_name, block_title=None):
207+
"""Format the Markdown header associated with a block.
208+
Due to the optional block_title, we split the string construction
209+
into two parts."""
210+
211+
block_config = self._block_configuration[block_name]
212+
pretty_name = block_config["pretty_name"]
213+
show_count = block_config["show_count"]
214+
markdown_heading = block_config["markdown_heading"]
215+
216+
block_count = self._block_counter[block_name] if show_count else ""
217+
self._block_counter[block_name] += 1
218+
219+
output_str = "{markdown_heading} {pretty_name} {block_count}".format(
220+
markdown_heading=markdown_heading,
221+
pretty_name=pretty_name,
222+
block_count=block_count,
223+
)
224+
225+
if block_title:
226+
output_str = "{output_str} ({block_title})".format(
227+
output_str=output_str, block_title=block_title
228+
)
229+
230+
return output_str.lstrip().rstrip()
231+
232+
def _latex_to_markdown(self):
233+
"""Main function, returns the formatted Markdown as a string.
234+
Uses a lot of custom regexes to fix a lot of content - you may have
235+
to add or remove some regexes to suit your own needs."""
236+
237+
# Get main content, skipping preamble and closing tags.
238+
try:
239+
output = self._main_re.search(self._latex_string).group("main")
240+
except AttributeError:
241+
output = self._latex_string
242+
243+
# Reformat, lists, blocks, and headers.
244+
output = self._lists_re.sub(self._replace_block, output)
245+
output = self._block_re.sub(self._replace_block, output)
246+
output = self._header_re.sub(self._replace_header, output)
247+
output = self._aux_block_re.sub(self._replace_block, output)
248+
249+
# Fix \\ formatting for line breaks in align blocks
250+
output = re.sub(r" \\\\", r" \\\\\\\\", output)
251+
# Convert align* block to align - this fixes formatting
252+
output = re.sub(r"align\*", r"align", output)
253+
254+
# Fix emph, textbf, texttt formatting
255+
output = re.sub(r"\\emph{(.*?)}", r"*\1*", output)
256+
output = re.sub(r"\\textbf{(.*?)}", r"**\1**", output)
257+
output = re.sub(r"\\texttt{(.*?)}", r"`\1`", output)
258+
259+
# Fix \% formatting
260+
output = re.sub(r"\\%", r"%", output)
261+
# Fix argmax, etc.
262+
output = re.sub(r"\\arg(max|min)", r"\\text{arg\1}", output)
263+
264+
# Throw away content in IGNORE/END block
265+
output = re.sub(
266+
r"% LaTeX2Markdown IGNORE(.*?)\% LaTeX2Markdown END",
267+
"",
268+
output,
269+
flags=re.DOTALL,
270+
)
271+
return output.lstrip().rstrip()
272+
273+
def to_markdown(self):
274+
return self._latex_to_markdown()
275+
276+
def to_latex(self):
277+
return self._latex_string

0 commit comments

Comments
 (0)