@@ -209,14 +209,14 @@ def html_to_paper(html):
209209
210210 Returns
211211 -------
212- dict
212+ paper : dict
213213 A dictionary containing metadata about the submission.
214214
215215 Notes
216216 -----
217217 The function with look at the JSON in the HTML. If title and author are not
218- found in JSON the the metatags are examined, citation_title and
219- citation_author.
218+ found in JSON the the metatags are examined, citation_title,
219+ citation_author, citation_online_date, description and citation_pdf_url .
220220
221221 The paper is not matched to proceedings.
222222
@@ -227,6 +227,9 @@ def html_to_paper(html):
227227 >>> paper['title'].startswith('Learning to grok')
228228 True
229229
230+ >>> paper['url']
231+ 'https://openreview.net/forum?id=aVh9KRZdRk'
232+
230233 """
231234 tree = etree .HTML (html )
232235 data = {}
@@ -267,6 +270,11 @@ def html_to_paper(html):
267270 # For instance https://openreview.net/forum?id=0g0X4H8yN4I
268271 # Instead we at the metadags
269272
273+ if 'abstract' not in data :
274+ abstract = _field_to_content (tree , 'description' )
275+ if abstract :
276+ data ['abstract' ] = abstract
277+
270278 if 'authors' not in data :
271279 authors = [
272280 author_element .attrib ['content' ]
@@ -283,12 +291,33 @@ def html_to_paper(html):
283291 if len (authors ) > 0 :
284292 data ['authors' ] = authors
285293
294+ if 'date' not in data :
295+ date = _field_to_content (tree , 'citation_online_date' )
296+ if date :
297+ data ['date' ] = date .replace ('/' , '-' )
298+
299+ if 'full_text_url' not in data :
300+ full_text_url = _field_to_content (tree , 'citation_pdf_url' )
301+ if full_text_url :
302+ data ['full_text_url' ] = full_text_url
303+
304+ if 'openreview_id' not in data :
305+ if 'full_text_url' in data :
306+ openreview_id = paper_url_to_identifier (data ['full_text_url' ])
307+ if openreview_id :
308+ data ['openreview_id' ] = openreview_id
309+
286310 if 'title' not in data :
287311 title = _fields_to_content (tree , ['citation_title' , 'DC.Title' ,
288312 'DC.Title.Alternative' ])
289313 if title :
290314 data ['title' ] = title
291315
316+ if 'url' not in data :
317+ if 'openreview_id' in data :
318+ data ['url' ] = 'https://openreview.net/forum?id=' + \
319+ data ['openreview_id' ]
320+
292321 return data
293322
294323
0 commit comments