Skip to content

Commit bcd13bd

Browse files
committed
parsinglists not working
1 parent 45172ff commit bcd13bd

8 files changed

+198
-46
lines changed
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
1-
import mne_cpp.core
1+
import mne_cpp.core as mne
22
# import mne_cpp.pdf_doc
33
from os import stat
44

5-
mne_cpp.core.version()
5+
mne.version()
66

7-
projectFolder = mne_cpp.core.baseFolder()
7+
projectFolder = mne.baseFolder()
88

99
# Recursively list all the files in a directory and order by size and print results.
1010
listOfFiles = []
11-
mne_cpp.core.recursiveFolderProcess(projectFolder + 'doc/gh-pages', lambda f: \
12-
listOfFiles.append((f, stat(f).st_size)) \
13-
if f.name.endswith('.md') \
14-
else None )
11+
mne.recursiveFolderProcess(projectFolder + 'doc/gh-pages', lambda f: \
12+
listOfFiles.append((f, stat(f).st_size)) \
13+
if f.name.endswith('.md') \
14+
else None )
1515
listOfFiles.sort(reverse=True, key=lambda f:f[1])
1616
for f in listOfFiles:
17-
print('File: ' + f[0].path + ' - (' + mne_cpp.core.sizeHumanReadable(f[1]) + ')')
17+
print('File: ' + f[0].path + ' - (' + mne.sizeHumanReadable(f[1]) + ')')
1818

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,93 @@
11
import mne_cpp.core
2-
import mne_cpp.pdf_doc
2+
import mne_cpp.pdf_doc as mnepdf
33

44
projectFolder = mne_cpp.core.baseFolder()
55
webBaseFolder = projectFolder + 'doc/gh-pages'
66

7-
# webDocuments = mne_cpp.pdf_doc.scanFolder(webBaseFolder)
7+
# webDocuments = mnepdf.scanFolder(webBaseFolder)
88
# print(webDocuments)
99

10-
# web = mne_cpp.pdf_doc.buildWebStructure(webDocuments)
10+
# web = mnepdf.buildWebStructure(webDocuments)
1111
# print('Printing Web Structure:')
1212
# print(web)
1313

1414
# (pathLabel, filePath, fileName, fileExt, fullPath) = mne_cpp.core.extractFilePaths('../../doc/gh-pages/pages/documentation/anonymize.md')
15-
(pathLabel, filePath, fileName, fileExt, fullPath) = mne_cpp.core.extractFilePaths('../../doc/gh-pages/pages/contact.md')
15+
# (pathLabel, filePath, fileName, fileExt, fullPath) = mne_cpp.core.extractFilePaths('../../doc/gh-pages/pages/contact.md')
16+
17+
# inFile = open(fullPath, mode = 'r', encoding = 'utf8')
18+
# inText = inFile.read()
19+
# inFile.close()
20+
21+
# outText = mnepdf.parseUnorderedList(inText)
22+
# outFile = open(pathLabel + filePath + fileName + '.PROCESSED' + '.' + fileExt, mode = 'w', encoding = 'utf8')
23+
# outFile.write(outText)
24+
# outFile.close()
25+
26+
inText = r'''
27+
---
28+
layout: default
29+
title: Markdown kitchen sink
30+
nav_order: 99
31+
---
32+
(\n(( *[-*] *)|(\s*\d+\.\s*))[^\-*\n ].+)+
33+
(\n(( *[-*] *)|( *\d+\. *))[^\-*\n ].+)+
34+
(\n(( *[-*] *)|( *\d+\. *))[^\-*\n ].+)+
35+
Text can be **bold**, _italic_, or ~~strikethrough~~.
36+
37+
[Link to another page](another-page).
38+
39+
There should be whitespace between paragraphs.
40+
41+
There should be whitespace between paragraphs. We recommend including a README, or a file with information about your project.
42+
43+
# [](#header-1)Header 1
44+
45+
This is a normal paragraph following a header. GitHub is a code hosting platform for version control and collaboration. It lets you and others work together on projects from anywhere.
46+
47+
## [](#header-2)Header 2
48+
49+
> This is a blockquote following a header.
50+
>
51+
> When something is important enough, you do it even if the odds are not in your favor.
52+
53+
### [](#header-3)Header 3
54+
55+
```js
56+
// Javascript code with syntax highlighting.
57+
var fun = function lang(l) {
58+
dateformat.i18n = require('./lang/' + l)
59+
return true;
60+
}
61+
```
62+
63+
```ruby
64+
# Ruby code with syntax highlighting
65+
GitHubPages::Dependencies.gems.each do |gem, version|
66+
s.add_dependency(gem, "= #{version}")
67+
end
68+
```
69+
- level 1 item
70+
- level 2 item
71+
- level 2 item
72+
- level 3 item
73+
- level 3 item
74+
- level 1 item
75+
- level 2 item
76+
- level 2 item
77+
- level 2 item
78+
- level 1 item
79+
- level 2 item
80+
- level 2 item
81+
- level 1 item
82+
'''
83+
84+
outText = mnepdf.parseLists(inText)
85+
86+
87+
88+
89+
90+
a = 3
1691

17-
inFile = open(fullPath, mode = 'r', encoding = 'utf8')
18-
inText = inFile.read()
19-
inFile.close()
2092

21-
outText = mne_cpp.pdf_doc.parseUnorderedList(inText)
22-
outFile = open(pathLabel + filePath + fileName + '.PROCESSED' + '.' + fileExt, mode = 'w', encoding = 'utf8')
23-
outFile.write(outText)
24-
outFile.close()
2593

tools/python/list_trash_text1.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
\begin{itemize}
2+
\item level 1 item
3+
- level 2 item
4+
- level 2 item
5+
- level 3 item
6+
- level 3 item
7+
\item level 1 item
8+
- level 2 item
9+
- level 2 item
10+
- level 2 item
11+
\item level 1 item
12+
- level 2 item
13+
- level 2 item
14+
\item level 1 item
15+
\end{itemize}

tools/python/list_trash_text2.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
\begin{itemize}
2+
\item level 1 item
3+
\begin{itemize}
4+
\item level 2 item
5+
\item level 2 item
6+
\begin{itemize}
7+
\item level 3 item
8+
\item level 3 item
9+
\end{itemize}
10+
\end{itemize}
11+
\item level 1 item
12+
\begin{itemize}
13+
\item level 2 item
14+
\item level 2 item
15+
\item level 2 item
16+
\end{itemize}
17+
\item level 1 item
18+
\begin{itemize}
19+
\item level 2 item
20+
\item level 2 item
21+
\end{itemize}
22+
\item level 1 item
23+
\end{itemize}
24+
25+
26+
Binary file not shown.
Binary file not shown.

tools/python/mne_cpp/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,8 @@ def parseInputArguments(argsToParse, **opts):
158158
options[arg_adapted] = argsToParse[arg]
159159
return (v for k, v in options.items())
160160

161-
_suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
162161
def sizeHumanReadable(size):
162+
_suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
163163
# determine binary order in steps of size 10
164164
# (coerce to int, // still returns a float)
165165
order = int(log2(size) / 10) if size else 0

tools/python/mne_cpp/pdf_doc.py

Lines changed: 69 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,14 @@ def parseMarkDownFile(file, **inputArgs):
190190
else:
191191
with open(file.fullPath, 'r', encoding='utf8') as markDownFile, \
192192
open(texFile,'a+') as texFile:
193+
# The order here is relevant. Some of the regex depend on not having conflicting patterns.
194+
# i.e. empty lines can sometimes interfere with some lists patterns
195+
# i.e.2 horizontal lines (\n* * *) pattern can sometimes be understood as a list.
196+
# I've tried to minimize these conflicts but I'm not 100% sure. So any change should be tested...
193197
inText = markDownFile.read()
198+
inText = stripEmptyLines(inText)
194199
inText = deleteJustTheDocsHeader(inText)
200+
inText = parseHorizontalLines(inText)
195201
inText = parseInlineItalicText(inText)
196202
inText = parseInlineBoldText(inText)
197203
inText = parseUnorderedList(inText)
@@ -211,25 +217,11 @@ def parseInlineItalicText(inText):
211217
def parseInlineBoldText(inText):
212218
return re.sub(r'(?<=\W)((?P<dstar>\*\*)|__)(?P<btext>[\w ]+)((?(dstar)\*\*)|__)(?=\W)',r'\\textbf{\g<btext>}', inText)
213219

214-
def parseUnorderedList(inText):
215-
match = re.search(r'(\n\s?\*\s?.+)(\n\s?\*\s?(.+))*', inText)
216-
if match:
217-
outList = '\n\\begin{itemize}\n'
218-
pattern2 = re.compile(r'\n*\s*\*\s*(?P<item>.+)(?=\n)?')
219-
itemList = pattern2.finditer(match.group(0))
220-
for item in itemList:
221-
outList += '\t\\item ' + item.group('item') + '\n'
222-
outList += '\\end{itemize}'
223-
outText = inText[:match.start(0)] + outList + inText[match.end(0):]
224-
return parseUnorderedList(outText)
225-
else:
226-
return inText
227-
228220
def parseInlineImages(inText):
229221
match = re.search(r'!\[(?P<alt_text>[^]]+)\]\((?P<imgFilePath>[^)]+)\)', inText)
230222
if match:
231-
imgPath = mne_cpp.core.none_if_empty(match.group('imgFilePath'))
232-
imgAltText = mne_cpp.core.none_if_empty(match.group('alt_text'))
223+
imgPath = mne_cpp.core.noneIfEmpty(match.group('imgFilePath'))
224+
imgAltText = mne_cpp.core.noneIfEmpty(match.group('alt_text'))
233225
figText = '\n\\begin{wrapfigure}{r}{0.5\\textwidth}'
234226
figText += '\n\t\\begin{center}'
235227
figText += '\n\t\t\\includegraphics[width=0.4\\textwidth]{ ' + imgPath + '}'
@@ -244,7 +236,7 @@ def parseInlineImages(inText):
244236
def parseInlineHTMLImages(inText):
245237
match = re.search(r'<\s*img\s*src\s*=\s*"(?P<imgPath>[^"]+)".*>', inText)
246238
if match:
247-
imgPath = mne_cpp.core.none_if_empty(match.group('imgFilePath'))
239+
imgPath = mne_cpp.core.noneIfEmpty(match.group('imgFilePath'))
248240
figText = '\n\\begin{wrapfigure}{r}{0.5\\textwidth}'
249241
figText += '\n\t\\begin{center}'
250242
figText += '\n\t\t\\includegraphics[width=0.4\\textwidth]{ ' + imgPath + '}'
@@ -256,7 +248,7 @@ def parseInlineHTMLImages(inText):
256248
else:
257249
return inText
258250

259-
def parseTableMd(inText)
251+
def parseTableMd(inText):
260252
match = re.search(r'(?<=\n)\|([^|\n]+\|)+', inText)
261253
if match:
262254
tableText = inText[match.start(0):match.end(0)]
@@ -312,14 +304,67 @@ def parseHeaders(inText):
312304
else:
313305
return inText
314306

315-
def parseHorizontalLine(inText):
307+
def parseHorizontalLines(inText):
316308
return re.sub(r'(?<=\n)\*\s\*\s\*(?=\n)','\\noindent\\rule{15cm}{0.5pt}', inText)
317309

318-
# parse horizontal line
319-
# \n\* \* \*
310+
def stripHorizontalLines(inText):
311+
return re.sub(r'(?<=\n)\*\s\*\s\*(?=\n)','', inText)
312+
313+
def stripEmptyLines(inText):
314+
return re.sub(r'((?<=\n)\n)','',inText)
315+
316+
# def parseUnorderedList(inText):
317+
# match = re.search(r'(\n\s?\*\s?.+)(\n\s?\*\s?(.+))*', inText)
318+
# if match:
319+
# outList = '\n\\begin{itemize}\n'
320+
# pattern2 = re.compile(r'\n*\s*\*\s*(?P<item>.+)(?=\n)?')
321+
# itemList = pattern2.finditer(match.group(0))
322+
# for item in itemList:
323+
# outList += '\t\\item ' + item.group('item') + '\n'
324+
# outList += '\\end{itemize}'
325+
# outText = inText[:match.start(0)] + outList + inText[match.end(0):]
326+
# return parseUnorderedList(outText)
327+
# else:
328+
# return inText
329+
def parseUnorderedList(inText, i):
330+
pattern = r'\n(( {0}[-*] *)(?P<itemText>.*))'
331+
lastMatch = len(re.findall(pattern, inListText))
332+
matches = re.finditer(pattern, inListText)
333+
parsedText = ''
334+
for numMatch, match in enumerate(matches, start = 1):
335+
itemText = '\n\\begin{itemize}' if numMatch is 1 else ''
336+
itemText += '\\item ' + match.group('itemText')
337+
itemText += '\\end{itemize}' if numMatch is lastMatch
338+
parsedText += inListText[:match.start()] + itemText + inListText[match.end():]
339+
340+
341+
def parseOneList(inList):
342+
outList = parseUnorderedList(inList)
343+
344+
345+
346+
def parseLists(inText):
347+
match = re.search(r'(\n(( *[-*] *)|( *\d+\. *))[^\-*\n ].+)+', inText)
348+
if match:
349+
parsedList = parseOneList(match.group())
350+
outText = inText[:match.start()] + parsedList + inText[match.end():]
351+
return parseLists
352+
else:
353+
return inText
354+
355+
356+
# for spaces in range(2:2:6):
357+
# pattern =
358+
359+
# matches4ord = re.finditer(r'(\n( {2}(\d+\.) *)([^-\n ].*))+', text[match.start(0):match.end(0)])
360+
# for match4ord in matches4ord:
361+
# outText = '\n\\begin{enumerate}\n'
362+
363+
# ((\n {2}\d+\. *)(?P<item>.*))
320364

321365
# parse all lists with (\n((\s*[-*]\s*)|(\s*\d+\.\s*)).+)+
322-
# see https://regex101.com/r/2uKqPB/1/
366+
# https://regex101.com/r/idzIo5/1/
367+
# https://regex101.com/r/Iu3hKt/1
323368

324369
# after this parse
325370
# ordered lists of level 4
@@ -335,18 +380,16 @@ def parseHorizontalLine(inText):
335380
# https://tex.stackexchange.com/questions/247681/how-to-create-checkbox-todo-list
336381

337382

338-
339-
340383
# still missing:
341384
# ordered and unordered lists parsing
342385
# inbound links vs outbound links
343386
# parse inline code
344387
# preamble and ending file
345388
# parse multiple terms description/definition
346-
389+
# header tags up to 6 #s
347390

348391
def processImage(imageFile):
349-
_, _, _, _, fileExt = mne_cpp.core.parseFilePathNameExt(imageFile)
392+
_, _, _, _, fileExt = mne_cpp.core.parseFilePathNameExt(imageFile)
350393
if fileExt == "jpg" or fileExt == "jpeg":
351394
jpg2png(imageFile)
352395
if fileExt == "svg2":

0 commit comments

Comments
 (0)