Skip to content

Commit ae36174

Browse files
Merge pull request #536 from Luen/main
Add in PDF link in publication fill
2 parents 483b338 + 35f97d7 commit ae36174

File tree

2 files changed

+12
-2
lines changed

2 files changed

+12
-2
lines changed

scholarly/publication_parser.py

+8
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,10 @@ def _scholar_pub(self, __data, publication: Publication):
202202
if title.find('a'):
203203
publication['pub_url'] = title.find('a')['href']
204204

205+
pdf_div = __data.find('div', class_='gs_ggs gs_fl')
206+
if pdf_div and pdf_div.find('a', href=True):
207+
publication['eprint_url'] = pdf_div.find('a')['href']
208+
205209
author_div_element = databox.find('div', class_='gs_a')
206210
authorinfo = author_div_element.text
207211
authorinfo = authorinfo.replace(u'\xa0', u' ') # NBSP
@@ -286,6 +290,10 @@ def fill(self, publication: Publication)->Publication:
286290
if soup.find('a', class_='gsc_oci_title_link'):
287291
publication['pub_url'] = soup.find(
288292
'a', class_='gsc_oci_title_link')['href']
293+
if soup.find('div', class_='gsc_oci_title_ggi'):
294+
link = soup.find('a', attrs={'data-clk': True})
295+
if link:
296+
publication['eprint_url'] = link['href']
289297
for item in soup.find_all('div', class_='gs_scl'):
290298
key = item.find(class_='gsc_oci_field').text.strip().lower()
291299
val = item.find(class_='gsc_oci_value')

test_module.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ def test_search_pubs(self):
724724
pubs = list(scholarly.search_pubs('"naive physics" stability "3d shape"'))
725725
# Check that the first entry in pubs is the same as pub.
726726
# Checking for quality holds for non-dict entries only.
727-
for key in {'author_id', 'pub_url', 'num_citations'}:
727+
for key in {'author_id', 'pub_url', 'eprint_url', 'num_citations'}:
728728
self.assertEqual(pub[key], pubs[0][key])
729729
for key in {'title', 'pub_year', 'venue'}:
730730
self.assertEqual(pub['bib'][key], pubs[0]['bib'][key])
@@ -784,6 +784,7 @@ def test_search_pubs_filling_publication_contents(self):
784784
self.assertTrue(f['bib']['publisher'] == u'The Association for Research in Vision and Ophthalmology')
785785
self.assertTrue(f['bib']['title'] == u'Creating correct blur and its effect on accommodation')
786786
self.assertTrue(f['pub_url'] == u'https://jov.arvojournals.org/article.aspx?articleid=2701817')
787+
self.assertTrue(f['eprint_url'] == u'https://jov.arvojournals.org/arvo/content_public/journal/jov/937491/i1534-7362-18-9-1.pdf')
787788
self.assertTrue(f['bib']['volume'] == '18')
788789
self.assertTrue(f['bib']['pub_year'] == u'2018')
789790

@@ -800,6 +801,7 @@ def test_related_articles_from_author(self):
800801
# Typically, the same publication is returned as the most related article
801802
same_article = next(related_articles)
802803
self.assertEqual(pub["pub_url"], same_article["pub_url"])
804+
self.assertEqual(pub["eprint_url"], same_article["eprint_url"])
803805
for key in {'title', 'pub_year'}:
804806
self.assertEqual(str(pub['bib'][key]), (same_article['bib'][key]))
805807

@@ -818,7 +820,7 @@ def test_related_articles_from_publication(self):
818820
related_articles = scholarly.get_related_articles(pub)
819821
# Typically, the same publication is returned as the most related article
820822
same_article = next(related_articles)
821-
for key in {'author_id', 'pub_url', 'num_citations'}:
823+
for key in {'author_id', 'pub_url', 'eprint_url', 'num_citations'}:
822824
self.assertEqual(pub[key], same_article[key])
823825
for key in {'title', 'pub_year'}:
824826
self.assertEqual(pub['bib'][key], same_article['bib'][key])

0 commit comments

Comments
 (0)