@@ -24,7 +24,7 @@ class XLSDocument:
2424 author : str
2525 folder : str
2626 filename : str
27- status : str # draft, candidate, released , etc.
27+ status : str # draft, final, stagnant, withdrawn , etc.
2828
2929 def to_dict (self ):
3030 return asdict (self )
@@ -61,6 +61,7 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
6161 r"[dD]escription:\s*(.*?)(?:\n|$)" ,
6262 ],
6363 "author" : [r"[aA]uthor:\s*(.*?)(?:\n|$)" ],
64+ "status" : [r"[sS]tatus:\s*(.*?)(?:\n|$)" ],
6465 }
6566
6667 for key , pattern_list in patterns .items ():
@@ -69,7 +70,7 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
6970 if match :
7071 value = match .group (1 ).strip ()
7172 # Clean HTML tags from value
72- value = BeautifulSoup (value , "html.parser" ).get_text ()
73+ value = BeautifulSoup (value , "html.parser" ).get_text (). replace ( " ," , "," ). strip ()
7374 metadata [key ] = value
7475 break
7576 else :
@@ -91,11 +92,8 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
9192 xls_match = re .match (r"XLS-(\d+)([d]?)" , folder_name )
9293 if xls_match :
9394 number = xls_match .group (1 )
94- is_draft = xls_match .group (2 ) == "d"
95- status = "draft" if is_draft else "released"
9695 else :
9796 number = "000"
98- status = "unknown"
9997
10098 return XLSDocument (
10199 number = number ,
@@ -104,7 +102,7 @@ def extract_xls_metadata(content: str, folder_name: str) -> Optional[XLSDocument
104102 author = metadata ["author" ],
105103 folder = folder_name ,
106104 filename = "README.md" ,
107- status = status ,
105+ status = metadata [ " status" ] ,
108106 )
109107
110108
0 commit comments