-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathreader.py
65 lines (55 loc) · 2.19 KB
/
reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
from pelican import signals
from nbconvert import HTMLExporter
from pelican.readers import BaseReader
from .preprocess import config_pres, Metadata
from bs4 import BeautifulSoup
def register():
"""
Register the new "ipynb" reader
"""
def add_reader(arg):
arg.settings["READERS"]["ipynb"] = ipynbReader
signals.initialized.connect(add_reader)
def get_file_name(full_path):
filename = full_path.split('/')[-1]
clean_name = '.'.join(filename.split('.')[:-1])
return clean_name.strip()
class ipynbReader(BaseReader):
'''ipynb Reader for pelican
A part of the code derived from pelican-ipynb
'''
enabled = True
file_extensions = ['ipynb']
DEFAULT_CELL_PENALTY = 120
DEFAULT_SUMMARY_SIZE = 600
def read(self, source_path):
'''Parse content and metadata for ipynb files'''
exporter = HTMLExporter(template_file='basic',
preprocessors=config_pres(self.settings))
content, info = exporter.from_filename(source_path)
# Math Support
summary = ""
text = 0
soup = BeautifulSoup(content, 'html.parser')
for x in soup.findAll('a', class_="anchor-link"):
p = x.parent
x.extract()
x.string = "#"
p.insert(0, " ")
p.insert(0, x)
penalty = self.settings.get('CELL_PENALTY', self.DEFAULT_CELL_PENALTY)
summary_size = self.settings.get('SUMMARY_SIZE', self.DEFAULT_SUMMARY_SIZE)
for cell in soup.find_all('div', recursive=False):
delta = len(cell.get_text())# penalty for each cell
delta += penalty * len(cell.find_all('div', ["input", 'output_wrapper'], recursive=False))
if text and text+delta >= summary_size*1.1 or text > summary_size:
break
text += delta
summary += str(cell)
metadata = {'title': get_file_name(source_path), 'summary': summary}
metadata.update(Metadata.data)
metadata['summary'] = summary
# Change Metadata.data to standard pelican metadata
for k, v in metadata.items():
metadata[k] = self.process_metadata(k, v)
return str(soup), metadata