|
1 | 1 | import os |
| 2 | +import re |
2 | 3 | import sys |
3 | 4 | import shutil |
4 | 5 | import hashlib |
|
14 | 15 | bch_size = 5 |
15 | 16 |
|
16 | 17 | class File(): |
17 | | - def __init__(self, path: str): |
18 | | - self.path = path |
19 | | - self.dir = os.path.dirname(self.path) |
20 | | - self.filename = os.path.basename(self.path) |
21 | | - os.makedirs(self.dir, exist_ok=True) |
22 | | - def read(self): |
23 | | - with open(self.path, "r", encoding="utf-8") as f: |
24 | | - content = f.read() |
25 | | - return content |
26 | | - def readlines(self): |
27 | | - with open(self.path, "r", encoding="utf-8") as f: |
28 | | - lines = f.readlines() |
29 | | - return lines |
30 | | - def write(self, content: str): |
31 | | - with open(self.path, "w+", encoding="utf-8") as f: |
32 | | - f.write(content) |
33 | | - def writelines(self, lines: list): |
34 | | - with open(self.path, "w+", encoding="utf-8") as f: |
35 | | - f.writelines(lines) |
| 18 | + def __init__(self, path: str): |
| 19 | + self.path = path |
| 20 | + self.dir = os.path.dirname(self.path) |
| 21 | + self.filename = os.path.basename(self.path) |
| 22 | + os.makedirs(self.dir, exist_ok=True) |
| 23 | + def read(self): |
| 24 | + with open(self.path, "r", encoding="utf-8") as f: |
| 25 | + content = f.read() |
| 26 | + return content |
| 27 | + def readlines(self): |
| 28 | + with open(self.path, "r", encoding="utf-8") as f: |
| 29 | + lines = f.readlines() |
| 30 | + return lines |
| 31 | + def write(self, content: str): |
| 32 | + with open(self.path, "w+", encoding="utf-8") as f: |
| 33 | + f.write(content) |
| 34 | + def writelines(self, lines: list): |
| 35 | + with open(self.path, "w+", encoding="utf-8") as f: |
| 36 | + f.writelines(lines) |
36 | 37 |
|
37 | 38 | def hash_str(s): |
38 | | - return hashlib.sha256(s.encode()).hexdigest() |
| 39 | + return hashlib.sha256(s.encode()).hexdigest() |
39 | 40 |
|
40 | 41 | def metaext(src): |
41 | | - print(" Extracting metadata:") |
42 | | - lines = File(src).readlines() |
43 | | - global meta |
44 | | - meta = {} |
45 | | - inmd = False |
46 | | - fpos = -1 |
47 | | - for line in lines: |
48 | | - fpos += 1 |
49 | | - if line.strip() == "---": |
50 | | - lines[fpos] = "" |
51 | | - if inmd: break |
52 | | - else: inmd = True |
53 | | - else: |
54 | | - if ":" not in line: continue |
55 | | - key, val = line.split(":", 1) |
56 | | - lines[fpos] = "" |
57 | | - meta[key.strip()] = val.strip() |
58 | | - if len(meta): |
59 | | - print("title: %s; author: %s; date: %s" % (meta.get("title"), meta.get("author"), meta.get("date"))) |
60 | | - File(src).writelines(lines) |
61 | | - else: |
62 | | - print(" Error: no metadata found") |
| 42 | + print(" Extracting metadata:") |
| 43 | + lines = File(src).readlines() |
| 44 | + global meta |
| 45 | + meta = {} |
| 46 | + inmd = False |
| 47 | + fpos = -1 |
| 48 | + for line in lines: |
| 49 | + fpos += 1 |
| 50 | + if line.strip() == "---": |
| 51 | + lines[fpos] = "" |
| 52 | + if inmd: break |
| 53 | + else: inmd = True |
| 54 | + else: |
| 55 | + if ":" not in line: continue |
| 56 | + key, val = line.split(":", 1) |
| 57 | + lines[fpos] = "" |
| 58 | + meta[key.strip()] = val.strip() |
| 59 | + if len(meta): |
| 60 | + print("title: %s; author: %s; date: %s" % (meta.get("title"), meta.get("author"), meta.get("date"))) |
| 61 | + File(src).writelines(lines) |
| 62 | + else: |
| 63 | + print(" Error: no metadata found") |
| 64 | + |
| 65 | +def sanitize_meta(value: str) -> str: |
| 66 | + """Sanitize a metadata value for safe LaTeX insertion.""" |
| 67 | + if value is None: |
| 68 | + return "" |
| 69 | + |
| 70 | + sanitized = value.strip() |
| 71 | + if sanitized.startswith(('"', "'")) and sanitized.endswith(('"', "'")): |
| 72 | + sanitized = sanitized[1:-1] |
| 73 | + |
| 74 | + replacements = { |
| 75 | + "\\": r"\\textbackslash{}", |
| 76 | + "{": r"\\{", |
| 77 | + "}": r"\\}", |
| 78 | + "$": r"\\$", |
| 79 | + "&": r"\\&", |
| 80 | + "#": r"\\#", |
| 81 | + "_": r"\\_", |
| 82 | + "%": r"\\%", |
| 83 | + "^": r"\\textasciicircum{}", |
| 84 | + "~": r"\\textasciitilde{}", |
| 85 | + } |
| 86 | + |
| 87 | + for needle, replacement in replacements.items(): |
| 88 | + sanitized = sanitized.replace(needle, replacement) |
| 89 | + |
| 90 | + return sanitized |
63 | 91 |
|
64 | 92 | def metainj(dst): |
65 | | - print(" Injecting metadata:") |
66 | | - manu = File(dst).readlines() |
67 | | - manu.insert(0, "\\title{%s}\n\\author{%s}\n\\date{%s}\n\\maketitle\n" % (meta.get("title"), meta.get("author"), meta.get("date"))) |
68 | | - File(dst).writelines(manu) |
| 93 | + print(" Injecting metadata:") |
| 94 | + manu = File(dst).readlines() |
| 95 | + title = sanitize_meta(meta.get("title")) |
| 96 | + author = sanitize_meta(meta.get("author")) |
| 97 | + date = sanitize_meta(meta.get("date")) |
| 98 | + manu.insert(0, f"\\title{{{title}}}\n\\author{{{author}}}\n\\date{{{date}}}\n\\maketitle\n") |
| 99 | + File(dst).writelines(manu) |
| 100 | + |
| 101 | +def texpost(dst): |
| 102 | + """Apply post-processing to generated TeX content.""" |
| 103 | + content = File(dst).read() |
| 104 | + def _sanitize_text(match): |
| 105 | + inner = match.group(1) |
| 106 | + inner = inner.replace("\\", "\\textbackslash{}") |
| 107 | + inner = inner.replace("\\textbackslash{}\\textbackslash{}", "\\textbackslash{}") |
| 108 | + inner = re.sub(r"(?<!\\)_", r"\\_", inner) |
| 109 | + return f"\\text{{{inner}}}" |
| 110 | + |
| 111 | + content = re.sub(r"\\text\{([^}]*)\}", _sanitize_text, content) |
| 112 | + content = re.sub(r"\\colon(?=\w)", r"\\colon ", content) |
| 113 | + content = re.sub(r"\\nabla(?=\w)", r"\\nabla ", content) |
| 114 | + content = re.sub(r"\\oplus(?=\w)", r"\\oplus ", content) |
| 115 | + content = re.sub( |
| 116 | + r"\\(Delta|delta|Gamma|gamma|Theta|theta|Lambda|lambda|Xi|xi|Pi|pi|Sigma|sigma|Upsilon|upsilon|Phi|phi|Psi|psi|Omega|omega)(?=[A-Za-z])", |
| 117 | + r"\\\1 ", |
| 118 | + content, |
| 119 | + ) |
| 120 | + content = re.sub(r"\\cdot(?=\w)", r"\\cdot ", content) |
| 121 | + content = re.sub(r"\\times(?=\w)", r"\\times ", content) |
| 122 | + content = re.sub(r"\\log(?=\w)", r"\\log ", content) |
| 123 | + File(dst).write(content) |
69 | 124 |
|
70 | 125 | def texcomp(drv): |
71 | | - print(" Generating PDF:") |
72 | | - shutil.copy(utl_dir + drv, tmp_dir + "index.ltx") |
73 | | - for fonts in os.listdir(fnt_dir): |
74 | | - shutil.copy(fnt_dir + fonts, tmp_dir + fonts) |
75 | | - for macro in os.listdir(sty_dir): |
76 | | - shutil.copy(sty_dir + macro, tmp_dir + macro) |
77 | | - pwd = os.getcwd() |
78 | | - os.chdir(tmp_dir) |
79 | | - os.system("lualatex index.ltx --interaction=batchmode") |
80 | | - os.chdir(pwd) |
| 126 | + print(" Generating PDF:") |
| 127 | + shutil.copy(utl_dir + drv, tmp_dir + "index.ltx") |
| 128 | + for fonts in os.listdir(fnt_dir): |
| 129 | + shutil.copy(fnt_dir + fonts, tmp_dir + fonts) |
| 130 | + for macro in os.listdir(sty_dir): |
| 131 | + shutil.copy(sty_dir + macro, tmp_dir + macro) |
| 132 | + pwd = os.getcwd() |
| 133 | + os.chdir(tmp_dir) |
| 134 | + os.system("lualatex index.ltx --interaction=batchmode") |
| 135 | + os.chdir(pwd) |
81 | 136 |
|
82 | 137 | def pdfgenr(post): |
83 | | - shutil.copytree(src_dir + post, tmp_dir, dirs_exist_ok=True) |
84 | | - metaext(tmp_dir + "index.md") |
85 | | - print(" Converting:") |
86 | | - os.system(f"{utl_dir}md2tex {tmp_dir}index.md {tmp_dir}index.tex") |
87 | | - metainj(tmp_dir + "index.tex") |
88 | | - texcomp("drvpst.ltx") |
89 | | - shutil.copy(tmp_dir + "index.tex", pbl_dir + post + "/index.tex") |
90 | | - shutil.copy(tmp_dir + "index.pdf", pbl_dir + post + "/index.pdf") |
91 | | - shutil.rmtree(tmp_dir) |
| 138 | + shutil.copytree(src_dir + post, tmp_dir, dirs_exist_ok=True) |
| 139 | + metaext(tmp_dir + "index.md") |
| 140 | + print(" Converting:") |
| 141 | + os.system(f"{utl_dir}md2tex {tmp_dir}index.md {tmp_dir}index.tex") |
| 142 | + metainj(tmp_dir + "index.tex") |
| 143 | + texpost(tmp_dir + "index.tex") |
| 144 | + texcomp("drvpst.ltx") |
| 145 | + shutil.copy(tmp_dir + "index.tex", pbl_dir + post + "/index.tex") |
| 146 | + shutil.copy(tmp_dir + "index.pdf", pbl_dir + post + "/index.pdf") |
| 147 | + shutil.rmtree(tmp_dir) |
92 | 148 |
|
93 | 149 | def post(): |
94 | | - # Compiles each post, specifically converts the .md file to a .tex file |
95 | | - # and compiles the .tex, finally moving .tex and .pdf to the public dir |
96 | | - cwd = os.getcwd() |
97 | | - print(f" Making directory: {cwd}") |
98 | | - os.chdir(utl_dir) |
99 | | - os.system("make") |
100 | | - os.chdir(cwd) |
101 | | - # For each of the posts in the source directory |
102 | | - for post in sorted(os.listdir(src_dir)): |
103 | | - # Get hash of the markdown file content |
104 | | - hsh = hash_str(File(src_dir + post + "/index.md").read()) |
105 | | - # If it is already compiled and the hash matches |
106 | | - if all([ |
107 | | - os.path.exists(pbl_dir + post + p) for p in ["/index.tex", "/index.pdf", "/sha256"] |
108 | | - ]) and File(pbl_dir + post + "/sha256").read() == hsh: |
109 | | - print(f" Skipping post: {post} #{hsh}") |
110 | | - continue |
111 | | - print(f" Processing post: {post}") |
112 | | - # Write markdown hash to file `sha256` |
113 | | - File(pbl_dir + post + "/sha256").write(hsh) |
114 | | - # Compile PDF |
115 | | - pdfgenr(post) |
116 | | - # Cleaning |
117 | | - os.chdir(utl_dir) |
118 | | - print(" Cleaning up:") |
119 | | - os.system("make clean") |
| 150 | + # Compiles each post, specifically converts the .md file to a .tex file |
| 151 | + # and compiles the .tex, finally moving .tex and .pdf to the public dir |
| 152 | + cwd = os.getcwd() |
| 153 | + print(f" Making directory: {cwd}") |
| 154 | + os.chdir(utl_dir) |
| 155 | + os.system("make") |
| 156 | + os.chdir(cwd) |
| 157 | + # For each of the posts in the source directory |
| 158 | + for post in sorted(os.listdir(src_dir)): |
| 159 | + # Get hash of the markdown file content |
| 160 | + hsh = hash_str(File(src_dir + post + "/index.md").read()) |
| 161 | + # If it is already compiled and the hash matches |
| 162 | + if all([ |
| 163 | + os.path.exists(pbl_dir + post + p) for p in ["/index.tex", "/index.pdf", "/sha256"] |
| 164 | + ]) and File(pbl_dir + post + "/sha256").read() == hsh: |
| 165 | + print(f" Skipping post: {post} #{hsh}") |
| 166 | + continue |
| 167 | + print(f" Processing post: {post}") |
| 168 | + # Write markdown hash to file `sha256` |
| 169 | + File(pbl_dir + post + "/sha256").write(hsh) |
| 170 | + # Compile PDF |
| 171 | + pdfgenr(post) |
| 172 | + # Cleaning |
| 173 | + os.chdir(utl_dir) |
| 174 | + print(" Cleaning up:") |
| 175 | + os.system("make clean") |
120 | 176 |
|
121 | 177 | def batch(): |
122 | | - # This function compiles several files sequentially into one batch version |
123 | | - # The number of files in each batch are defined in `bch_size`, remaining |
124 | | - # files not reaching that number will not be included as a new batch. |
125 | | - cwd = os.getcwd() |
126 | | - print(f" Making directory: {cwd}") |
127 | | - # Reading all posts (date strings) |
128 | | - posts = sorted(os.listdir(src_dir)) |
129 | | - # Extracting batch ID and hash from preexisting batch directory |
130 | | - compiled = [i.split(".")[0].split("_") for i in sorted(os.listdir(bch_dir))] |
131 | | - compiled_hsh = { |
132 | | - int(i[1]): i[2] for i in compiled |
133 | | - } |
134 | | - # Generating each batch |
135 | | - for bch_id, bch_start in enumerate(range(0, len(posts), bch_size)): |
136 | | - # For remaining files at the end not reaching the size of a batch |
137 | | - if bch_start + bch_size > len(posts): break |
138 | | - # Current range of indices and hash of date strings |
139 | | - bch_range = list(range(bch_start, bch_start + bch_size)) |
140 | | - # Join contents of each post to generate hash |
141 | | - hsh = hash_str("".join([ |
142 | | - File(f"{src_dir}{posts[i]}/index.md").read() for i in bch_range |
143 | | - ]))[-6:] |
144 | | - existing_hsh = compiled_hsh.get(bch_id) |
145 | | - # If this batch is already present |
146 | | - if existing_hsh: |
147 | | - if existing_hsh == hsh: |
148 | | - print(f" Skipping batch: {bch_id} #{hsh}") |
149 | | - continue |
150 | | - else: |
151 | | - print(f" Removing obsolete: {bch_id} #{existing_hsh} -> #{hsh}") |
152 | | - os.remove(f"{bch_dir}compilation_{bch_id}_{existing_hsh}.pdf") |
153 | | - print(f" Processing batch: {bch_id} #{hsh}") |
154 | | - filename = f"compilation_{bch_id}_{hsh}" |
155 | | - # Writing index.tex to be compiled |
156 | | - File(f"{tmp_dir}index.tex").writelines([ |
157 | | - # Title |
158 | | - "\\mlytitle{" + f"c13n \\#{bch_id}" + "}", |
159 | | - *[ |
160 | | - # Dump file contents from each post directory |
161 | | - File(f"{pbl_dir}{posts[i]}/index.tex").read() |
162 | | - for i in bch_range |
163 | | - ] |
164 | | - ]) |
165 | | - # Compiling and cleaning |
166 | | - texcomp("drvmly.ltx") |
167 | | - shutil.copy(tmp_dir + "index.pdf", bch_dir + filename.lower() + ".pdf") |
168 | | - shutil.rmtree(tmp_dir) |
| 178 | + # This function compiles several files sequentially into one batch version |
| 179 | + # The number of files in each batch are defined in `bch_size`, remaining |
| 180 | + # files not reaching that number will not be included as a new batch. |
| 181 | + cwd = os.getcwd() |
| 182 | + print(f" Making directory: {cwd}") |
| 183 | + # Reading all posts (date strings) |
| 184 | + posts = sorted(os.listdir(src_dir)) |
| 185 | + # Extracting batch ID and hash from preexisting batch directory |
| 186 | + compiled = [i.split(".")[0].split("_") for i in sorted(os.listdir(bch_dir))] |
| 187 | + compiled_hsh = { |
| 188 | + int(i[1]): i[2] for i in compiled |
| 189 | + } |
| 190 | + # Generating each batch |
| 191 | + for bch_id, bch_start in enumerate(range(0, len(posts), bch_size)): |
| 192 | + # For remaining files at the end not reaching the size of a batch |
| 193 | + if bch_start + bch_size > len(posts): break |
| 194 | + # Current range of indices and hash of date strings |
| 195 | + bch_range = list(range(bch_start, bch_start + bch_size)) |
| 196 | + # Join contents of each post to generate hash |
| 197 | + hsh = hash_str("".join([ |
| 198 | + File(f"{src_dir}{posts[i]}/index.md").read() for i in bch_range |
| 199 | + ]))[-6:] |
| 200 | + existing_hsh = compiled_hsh.get(bch_id) |
| 201 | + # If this batch is already present |
| 202 | + if existing_hsh: |
| 203 | + if existing_hsh == hsh: |
| 204 | + print(f" Skipping batch: {bch_id} #{hsh}") |
| 205 | + continue |
| 206 | + else: |
| 207 | + print(f" Removing obsolete: {bch_id} #{existing_hsh} -> #{hsh}") |
| 208 | + os.remove(f"{bch_dir}compilation_{bch_id}_{existing_hsh}.pdf") |
| 209 | + print(f" Processing batch: {bch_id} #{hsh}") |
| 210 | + filename = f"compilation_{bch_id}_{hsh}" |
| 211 | + # Writing index.tex to be compiled |
| 212 | + File(f"{tmp_dir}index.tex").writelines([ |
| 213 | + # Title |
| 214 | + "\\mlytitle{" + f"c13n \\#{bch_id}" + "}", |
| 215 | + *[ |
| 216 | + # Dump file contents from each post directory |
| 217 | + File(f"{pbl_dir}{posts[i]}/index.tex").read() |
| 218 | + for i in bch_range |
| 219 | + ] |
| 220 | + ]) |
| 221 | + # Compiling and cleaning |
| 222 | + texcomp("drvmly.ltx") |
| 223 | + shutil.copy(tmp_dir + "index.pdf", bch_dir + filename.lower() + ".pdf") |
| 224 | + shutil.rmtree(tmp_dir) |
169 | 225 |
|
170 | 226 | if len(sys.argv) != 2: |
171 | | - print("make: Incorrect options...") |
| 227 | + print("make: Incorrect options...") |
172 | 228 | elif sys.argv[1] == "post": |
173 | | - post() |
| 229 | + post() |
174 | 230 | elif sys.argv[1] == "batch": |
175 | | - batch() |
| 231 | + batch() |
176 | 232 | else: |
177 | | - print("make: Doing nothing...") |
| 233 | + print("make: Doing nothing...") |
0 commit comments