Skip to content

Commit 80082a3

Browse files
committed
fix parser
1 parent 1113dee commit 80082a3

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

site/xls_parser.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class XLSDocument:
2424
author: str
2525
folder: str
2626
filename: str
27-
status: str # draft, candidate, released, etc.
27+
status: str # draft, final, stagnant, withdrawn, etc.
2828

2929
def to_dict(self):
3030
return asdict(self)
@@ -61,6 +61,7 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
6161
r"[dD]escription:\s*(.*?)(?:\n|$)",
6262
],
6363
"author": [r"[aA]uthor:\s*(.*?)(?:\n|$)"],
64+
"status": [r"[sS]tatus:\s*(.*?)(?:\n|$)"],
6465
}
6566

6667
for key, pattern_list in patterns.items():
@@ -69,7 +70,7 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
6970
if match:
7071
value = match.group(1).strip()
7172
# Clean HTML tags from value
72-
value = BeautifulSoup(value, "html.parser").get_text()
73+
value = BeautifulSoup(value, "html.parser").get_text().replace(" ,", ",").strip()
7374
metadata[key] = value
7475
break
7576
else:
@@ -91,11 +92,8 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
9192
xls_match = re.match(r"XLS-(\d+)([d]?)", folder_name)
9293
if xls_match:
9394
number = xls_match.group(1)
94-
is_draft = xls_match.group(2) == "d"
95-
status = "draft" if is_draft else "released"
9695
else:
9796
number = "000"
98-
status = "unknown"
9997

10098
return XLSDocument(
10199
number=number,
@@ -104,7 +102,7 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
104102
author=metadata["author"],
105103
folder=folder_name,
106104
filename="README.md",
107-
status=status,
105+
status=metadata["status"],
108106
)
109107

110108

0 commit comments

Comments
 (0)