-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen.py
executable file
·466 lines (387 loc) · 14 KB
/
gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
#!/usr/bin/env python3
from typing import Optional, Any
import os
import sys
import argparse
import re
import genanki # type: ignore
def main(
nome_file: str, deck_name: str, version: int, obsidian_assets_folder: Optional[str]
) -> None:
# Define Anki note model
model_id = 1345664314
model = genanki.Model(
model_id,
"Custom Model",
fields=[
{"name": "Question"},
{"name": "Answer"},
],
templates=[
{
"name": "QA",
"qfmt": "{{Question}}",
"afmt": "{{Answer}}",
},
# {
# 'name': 'AQ',
# 'qfmt': '{{Answer}}',
# 'afmt': '{{Question}}',
# },
],
)
# Generate Anki cards and add them to a deck
deck_id = 3145744450
deck = genanki.Deck(deck_id, deck_name)
if version == 1:
count, media_files = generate_v1(nome_file, model, deck, obsidian_assets_folder)
elif version == 2:
count = generate_v2(nome_file, model, deck)
# TODO: Hardcoded media files
media_files = []
elif version == 3:
count = generate_v3(nome_file, model, deck)
# TODO: Hardcoded media files
media_files = []
else:
exit(1)
# Save the deck to an Anki package (*.apkg) file
file_dir = os.path.dirname(nome_file)
# Fix media files path
media_files = fix_media_files_path(file_dir, media_files)
# Generate output
output = f"{deck_name.lower().replace(' ', '_')}.apkg"
package = genanki.Package(deck)
package.media_files = media_files
package.write_to_file(os.path.join(file_dir, output))
print("Saved", count, "flashcards")
def fix_media_files_path(dir: str, media_files: list[str]) -> list[str]:
fixed_media_files = []
for media in media_files:
fixed_media_files.append(os.path.join(dir, media))
return fixed_media_files
def save_question_v1(
dict: dict[str, list[tuple[str, str]]],
current_title: Any,
question_rows: list[str],
answer_rows: list[str],
) -> None:
if question_rows and answer_rows:
dict[current_title].append(("\n".join(question_rows), "\n".join(answer_rows)))
def save_question_v2(
dict: dict[str, list[tuple[str, str, str, str]]],
current_chapter: Any,
current_subchapter: Any,
current_subsubchapter: Any,
question_rows: list[str],
answer_rows: list[str],
) -> None:
if question_rows and answer_rows:
dict[current_chapter].append(
(
current_subchapter,
current_subsubchapter,
"\n".join(question_rows),
"\n".join(answer_rows),
)
)
def fix_string(content: str) -> str:
content = (
content.strip()
.replace("<", "<")
.replace(">", ">")
.replace("# ", "")
.replace("#", "")
)
# Find the index of the first letter in the alphabet
match = re.search(r"[A-Za-z]", content)
if match is None:
return content
first_letter_index = match.start()
# Make the first letter uppercase
stringa_modificata = (
content[:first_letter_index]
+ content[first_letter_index].upper()
+ content[first_letter_index + 1 :]
)
return stringa_modificata
def parse_media_images(
markdown_text: str, obsidian_assets_folder: Optional[str]
) -> tuple[str, list[str]]:
def markdown_to_html_images(markdown_text: str) -> tuple[str, list[str]]:
image_paths = [] # Inizializza il vettore per i percorsi delle immagini
def replace_image(match: re.Match[str]) -> str:
alt_text = (
match.group(1) if match.group(1) else ""
) # Gestisci l'assenza dell'alt text
image_path = match.group(2)
image_paths.append(
image_path
) # Aggiungi il percorso dell'immagine alla lista
image_filename = image_path.split("/")[-1] # Estrai solo il nome del file
image_tag = f'<img src="{image_filename}" alt="{alt_text}">'
return image_tag
# Trova tutte le immagini in formato Markdown e sostituiscile con i tag HTML
pattern = r"\!\[([^\]]*)\]\(([^)]+)\)"
html_text = re.sub(pattern, replace_image, markdown_text)
return html_text, image_paths
def obsidian_md_to_html_images(
markdown_text: str, obsidian_assets_folder: str
) -> tuple[str, list[str]]:
image_paths = [] # Inizializza il vettore per i percorsi delle immagini
def replace_image(match: re.Match[str]) -> str:
image_path = match.group(1)
full_image_path = os.path.join(obsidian_assets_folder, image_path)
image_paths.append(full_image_path) # Add Obsidian image path to the list
image_filename = image_path.split("/")[-1] # Estrai solo il nome del file
return f'<img src="{image_filename}">'
# Trova tutte le immagini nel formato Markdown di Obsidian e sostituiscile con i tag HTML
pattern = r"\!\[\[([^\]]+)\]\]"
html_text = re.sub(pattern, replace_image, markdown_text)
return html_text, image_paths
# START FUNCTION
if obsidian_assets_folder is not None:
return obsidian_md_to_html_images(markdown_text, obsidian_assets_folder)
else:
return markdown_to_html_images(markdown_text)
def generate_v3(
nome_file: str,
model: genanki.Model,
deck: genanki.Deck,
) -> int:
with open(nome_file, "r") as file:
lines = file.readlines()
count = 0
for line_number, line in enumerate(lines):
# Line format: "| Question | Answer |"
q = line.split("|")[1].strip()
a = line.split("|")[2].strip()
if q == "" or a == "":
print("Skipping line number", line_number, ". Empty question or answer.")
continue
note = genanki.Note(model=model, fields=[q, a])
deck.add_note(note)
count += 1
return count
def generate_v1(
nome_file: str,
model: genanki.Model,
deck: genanki.Deck,
obsidian_assets_folder: Optional[str],
) -> tuple[int, list[str]]:
media_files = []
text_parsed_data = md_question_parse_v1(nome_file)
count = 0
for chapter in text_parsed_data:
# print("- Chapter:", chapter)
for q, a in text_parsed_data[chapter]:
# print(" Q:", q.replace('\n', ' | '))
# print(" A:", a.replace('\n', ' | '))
# Search Images in Question
q, media = parse_media_images(q, obsidian_assets_folder)
# Append media files to the list
for media_file in media:
media_files.append(media_file)
# Search Images in Answer
a, media = parse_media_images(a, obsidian_assets_folder)
# Append media files to the list
for media_file in media:
media_files.append(media_file)
# Fix other
q = q.replace("\n", "<br>")
q = f"<h1>{chapter}</h1>{q}"
a = a.replace("\n", "<br>")
note = genanki.Note(model=model, fields=[q, a])
deck.add_note(note)
count += 1
return count, media_files
def generate_v2(nome_file: str, model: genanki.Model, deck: genanki.Deck) -> int:
text_parsed_data = md_question_parse_v2(nome_file)
count = 0
for chapter in text_parsed_data:
# print("# ", chapter)
for c2, c3, q, a in text_parsed_data[chapter]:
# print("## ", c2)
# print("###", c3)
# print(" Q:", q.replace('\n', ' | '))
# print(" A:", a.replace('\n', ' | '))
q = q.replace("\n", "<br>")
q = f"<h3>{c3}</h3>{q}"
q = f"<h2>{c2}</h2>{q}"
q = f"<h1>{chapter}</h1>{q}"
a = a.replace("\n", "<br>")
note = genanki.Note(model=model, fields=[q, a])
deck.add_note(note)
count += 1
return count
def md_question_parse_v2(nome_file: str) -> dict[str, list[tuple[str, str, str, str]]]:
dict_parsed: dict[str, list[tuple[str, str, str, str]]] = {}
current_chapter = None
current_subchapter = None
current_subsubchapter = None
question_rows: list[str] = []
answer_rows: list[str] = []
is_question = True
with open(nome_file, "r") as file:
contenuto = file.readlines()
for riga in contenuto:
if riga.startswith("# "):
save_question_v2(
dict_parsed,
current_chapter,
current_subchapter,
current_subsubchapter,
question_rows,
answer_rows,
)
question_rows = []
answer_rows = []
current_chapter = fix_string(riga)
current_subchapter = None
current_subsubchapter = None
dict_parsed[current_chapter] = []
elif riga.startswith("## "):
save_question_v2(
dict_parsed,
current_chapter,
current_subchapter,
current_subsubchapter,
question_rows,
answer_rows,
)
question_rows = []
answer_rows = []
current_subchapter = fix_string(riga)
current_subsubchapter = None
elif riga.startswith("### "):
save_question_v2(
dict_parsed,
current_chapter,
current_subchapter,
current_subsubchapter,
question_rows,
answer_rows,
)
question_rows = []
answer_rows = []
current_subsubchapter = fix_string(riga)
elif riga.startswith("#### "):
save_question_v2(
dict_parsed,
current_chapter,
current_subchapter,
current_subsubchapter,
question_rows,
answer_rows,
)
question_rows = []
answer_rows = []
question_rows.append(fix_string(riga))
is_question = True
elif riga.strip() == "---":
is_question = False
else:
if is_question:
question_rows.append(fix_string(riga))
else:
answer_rows.append(fix_string(riga))
# Save last flashcard
save_question_v2(
dict_parsed,
current_chapter,
current_subchapter,
current_subsubchapter,
question_rows,
answer_rows,
)
return dict_parsed
def md_question_parse_v1(nome_file: str) -> dict[str, list[tuple[str, str]]]:
dict_parsed: dict[str, list[tuple[str, str]]] = {}
current_title = None
question_rows: list[str] = []
answer_rows: list[str] = []
is_question = True
with open(nome_file, "r") as file:
contenuto = file.readlines()
for riga in contenuto:
if riga.startswith("# "):
save_question_v1(dict_parsed, current_title, question_rows, answer_rows)
question_rows = []
answer_rows = []
current_title = fix_string(riga[2:]) # removes '# ' and whitespaces
dict_parsed[current_title] = []
elif riga.startswith("## "):
save_question_v1(dict_parsed, current_title, question_rows, answer_rows)
question_rows = []
answer_rows = []
question_rows.append(fix_string(riga[3:]))
is_question = True
elif riga.strip() == "---":
is_question = False
else:
if is_question:
question_rows.append(fix_string(riga))
else:
answer_rows.append(fix_string(riga))
save_question_v1(dict_parsed, current_title, question_rows, answer_rows)
return dict_parsed
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert Markdown to Anki Deck")
default_nome_file = "___default_file_name_error___"
default_obsidian_assets = "___default_obsidian-assets_error___"
parser.add_argument(
"input_file", nargs="?", default=default_nome_file, help="Input Markdown file"
)
parser.add_argument(
"-o", "--output", default="MD2AnkiDeckOutput", help="Output Anki Deck name"
)
parser.add_argument(
"-v",
"--version",
type=int,
choices=[1, 2, 3],
help="Anki Deck version (1, 2 or 3)",
)
parser.add_argument(
"--use-obsidian-format",
action="store_true",
help="Use Obsidian Markdown format",
)
parser.add_argument(
"-a",
"--obsidian-assets",
default=default_obsidian_assets,
help="Obsidian Assets Folder path",
)
args = parser.parse_args()
# Check Input File Name
input_file = args.input_file
if input_file == default_nome_file:
print("Errore: specificare un file di input.")
sys.exit(1)
elif not os.path.isfile(input_file):
print(f"Errore: il file '{input_file}' non esiste.")
sys.exit(1)
# Check Obsidian Assets Folder
use_obsidian_format = args.use_obsidian_format
obsidian_assets = args.obsidian_assets
obsidian_assets_folder = None
if use_obsidian_format:
if obsidian_assets == default_obsidian_assets:
print("Errore: specificare la cartella degli assets di Obsidian.")
sys.exit(1)
elif not os.path.isdir(
os.path.join(os.path.dirname(input_file), obsidian_assets)
):
print(f"Errore: la cartella '{obsidian_assets}' non esiste.")
sys.exit(1)
else:
obsidian_assets_folder = obsidian_assets
# Altri parametri
output_deck = args.output
deck_version = args.version
# Stampa i valori utilizzati
print(
f'Converting file: {input_file} to: {output_deck}.apkg\nVersion: {deck_version}\nUse Obsidian Format: {use_obsidian_format} (Path: "{obsidian_assets_folder}")'
)
main(input_file, output_deck, int(deck_version), obsidian_assets_folder)