@@ -190,8 +190,14 @@ def parseMarkDownFile(file, **inputArgs):
190
190
else :
191
191
with open (file .fullPath , 'r' , encoding = 'utf8' ) as markDownFile , \
192
192
open (texFile ,'a+' ) as texFile :
193
+ # The order here is relevant. Some of the regex depend on not having conflicting patterns.
194
+ # i.e. empty lines can sometimes interfere with some lists patterns
195
+ # i.e.2 horizontal lines (\n* * *) pattern can sometimes be understood as a list.
196
+ # I've tried to minimize these conflicts but I'm not 100% sure. So any change should be tested...
193
197
inText = markDownFile .read ()
198
+ inText = stripEmptyLines (inText )
194
199
inText = deleteJustTheDocsHeader (inText )
200
+ inText = parseHorizontalLines (inText )
195
201
inText = parseInlineItalicText (inText )
196
202
inText = parseInlineBoldText (inText )
197
203
inText = parseUnorderedList (inText )
@@ -211,25 +217,11 @@ def parseInlineItalicText(inText):
211
217
def parseInlineBoldText (inText ):
212
218
return re .sub (r'(?<=\W)((?P<dstar>\*\*)|__)(?P<btext>[\w ]+)((?(dstar)\*\*)|__)(?=\W)' ,r'\\textbf{\g<btext>}' , inText )
213
219
214
- def parseUnorderedList (inText ):
215
- match = re .search (r'(\n\s?\*\s?.+)(\n\s?\*\s?(.+))*' , inText )
216
- if match :
217
- outList = '\n \\ begin{itemize}\n '
218
- pattern2 = re .compile (r'\n*\s*\*\s*(?P<item>.+)(?=\n)?' )
219
- itemList = pattern2 .finditer (match .group (0 ))
220
- for item in itemList :
221
- outList += '\t \\ item ' + item .group ('item' ) + '\n '
222
- outList += '\\ end{itemize}'
223
- outText = inText [:match .start (0 )] + outList + inText [match .end (0 ):]
224
- return parseUnorderedList (outText )
225
- else :
226
- return inText
227
-
228
220
def parseInlineImages (inText ):
229
221
match = re .search (r'!\[(?P<alt_text>[^]]+)\]\((?P<imgFilePath>[^)]+)\)' , inText )
230
222
if match :
231
- imgPath = mne_cpp .core .none_if_empty (match .group ('imgFilePath' ))
232
- imgAltText = mne_cpp .core .none_if_empty (match .group ('alt_text' ))
223
+ imgPath = mne_cpp .core .noneIfEmpty (match .group ('imgFilePath' ))
224
+ imgAltText = mne_cpp .core .noneIfEmpty (match .group ('alt_text' ))
233
225
figText = '\n \\ begin{wrapfigure}{r}{0.5\\ textwidth}'
234
226
figText += '\n \t \\ begin{center}'
235
227
figText += '\n \t \t \\ includegraphics[width=0.4\\ textwidth]{ ' + imgPath + '}'
@@ -244,7 +236,7 @@ def parseInlineImages(inText):
244
236
def parseInlineHTMLImages (inText ):
245
237
match = re .search (r'<\s*img\s*src\s*=\s*"(?P<imgPath>[^"]+)".*>' , inText )
246
238
if match :
247
- imgPath = mne_cpp .core .none_if_empty (match .group ('imgFilePath' ))
239
+ imgPath = mne_cpp .core .noneIfEmpty (match .group ('imgFilePath' ))
248
240
figText = '\n \\ begin{wrapfigure}{r}{0.5\\ textwidth}'
249
241
figText += '\n \t \\ begin{center}'
250
242
figText += '\n \t \t \\ includegraphics[width=0.4\\ textwidth]{ ' + imgPath + '}'
@@ -256,7 +248,7 @@ def parseInlineHTMLImages(inText):
256
248
else :
257
249
return inText
258
250
259
- def parseTableMd (inText )
251
+ def parseTableMd (inText ):
260
252
match = re .search (r'(?<=\n)\|([^|\n]+\|)+' , inText )
261
253
if match :
262
254
tableText = inText [match .start (0 ):match .end (0 )]
@@ -312,14 +304,67 @@ def parseHeaders(inText):
312
304
else :
313
305
return inText
314
306
315
- def parseHorizontalLine (inText ):
307
+ def parseHorizontalLines (inText ):
316
308
return re .sub (r'(?<=\n)\*\s\*\s\*(?=\n)' ,'\\ noindent\\ rule{15cm}{0.5pt}' , inText )
317
309
318
- # parse horizontal line
319
- # \n\* \* \*
310
+ def stripHorizontalLines (inText ):
311
+ return re .sub (r'(?<=\n)\*\s\*\s\*(?=\n)' ,'' , inText )
312
+
313
+ def stripEmptyLines (inText ):
314
+ return re .sub (r'((?<=\n)\n)' ,'' ,inText )
315
+
316
+ # def parseUnorderedList(inText):
317
+ # match = re.search(r'(\n\s?\*\s?.+)(\n\s?\*\s?(.+))*', inText)
318
+ # if match:
319
+ # outList = '\n\\begin{itemize}\n'
320
+ # pattern2 = re.compile(r'\n*\s*\*\s*(?P<item>.+)(?=\n)?')
321
+ # itemList = pattern2.finditer(match.group(0))
322
+ # for item in itemList:
323
+ # outList += '\t\\item ' + item.group('item') + '\n'
324
+ # outList += '\\end{itemize}'
325
+ # outText = inText[:match.start(0)] + outList + inText[match.end(0):]
326
+ # return parseUnorderedList(outText)
327
+ # else:
328
+ # return inText
329
+ def parseUnorderedList (inText , i ):
330
+ pattern = r'\n(( {0}[-*] *)(?P<itemText>.*))'
331
+ lastMatch = len (re .findall (pattern , inListText ))
332
+ matches = re .finditer (pattern , inListText )
333
+ parsedText = ''
334
+ for numMatch , match in enumerate (matches , start = 1 ):
335
+ itemText = '\n \\ begin{itemize}' if numMatch is 1 else ''
336
+ itemText += '\\ item ' + match .group ('itemText' )
337
+ itemText += '\\ end{itemize}' if numMatch is lastMatch
338
+ parsedText += inListText [:match .start ()] + itemText + inListText [match .end ():]
339
+
340
+
341
+ def parseOneList (inList ):
342
+ outList = parseUnorderedList (inList )
343
+
344
+
345
+
346
+ def parseLists (inText ):
347
+ match = re .search (r'(\n(( *[-*] *)|( *\d+\. *))[^\-*\n ].+)+' , inText )
348
+ if match :
349
+ parsedList = parseOneList (match .group ())
350
+ outText = inText [:match .start ()] + parsedList + inText [match .end ():]
351
+ return parseLists
352
+ else :
353
+ return inText
354
+
355
+
356
+ # for spaces in range(2:2:6):
357
+ # pattern =
358
+
359
+ # matches4ord = re.finditer(r'(\n( {2}(\d+\.) *)([^-\n ].*))+', text[match.start(0):match.end(0)])
360
+ # for match4ord in matches4ord:
361
+ # outText = '\n\\begin{enumerate}\n'
362
+
363
+ # ((\n {2}\d+\. *)(?P<item>.*))
320
364
321
365
# parse all lists with (\n((\s*[-*]\s*)|(\s*\d+\.\s*)).+)+
322
- # see https://regex101.com/r/2uKqPB/1/
366
+ # https://regex101.com/r/idzIo5/1/
367
+ # https://regex101.com/r/Iu3hKt/1
323
368
324
369
# after this parse
325
370
# ordered lists of level 4
@@ -335,18 +380,16 @@ def parseHorizontalLine(inText):
335
380
# https://tex.stackexchange.com/questions/247681/how-to-create-checkbox-todo-list
336
381
337
382
338
-
339
-
340
383
# still missing:
341
384
# ordered and unordered lists parsing
342
385
# inbound links vs outbound links
343
386
# parse inline code
344
387
# preamble and ending file
345
388
# parse multiple terms description/definition
346
-
389
+ # header tags up to 6 #s
347
390
348
391
def processImage (imageFile ):
349
- _ , _ , _ , _ , fileExt = mne_cpp .core .parseFilePathNameExt (imageFile )
392
+ _ , _ , _ , _ , fileExt = mne_cpp .core .parseFilePathNameExt (imageFile )
350
393
if fileExt == "jpg" or fileExt == "jpeg" :
351
394
jpg2png (imageFile )
352
395
if fileExt == "svg2" :
0 commit comments