-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpmcite.py
114 lines (99 loc) · 3.69 KB
/
pmcite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 7 13:29:41 2020
PMCitE - PubMed Citation Extractor
A utility to query the NCBI database for citation data/bibliography.
@author: pormr
"""
import os
#import re
import requests
from tkinter import Tk
from tkinter.filedialog import asksaveasfilename
# Prompt for PMID
# TODO: Add functionality for resolving DOIs
print('PMCitE - A utility to query the NCBI database for citation data/bibliography.')
while 1:
try:
PMID = int(input('Please input the PMID:\n'))
except ValueError:
print('Invalid PMID.')
else:
break
# Query the NCBI database for citation data
citation_URI = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?db=pubmed&id=%i&cmd=neighbor&linkname=pubmed_pubmed_refs"
query_URI = "https://pubmed.ncbi.nlm.nih.gov/?linkname=pubmed_pubmed_refs&from_uid=%i"
post_URI = "https://pubmed.ncbi.nlm.nih.gov/results-export-search-data/"
retry_times = 3
for i in range(retry_times):
try:
citation_XML = requests.post(citation_URI % PMID, timeout=5)
except requests.exceptions.Timeout:
print('Connection timeout - Retry for %i time(s)' % (i + 1))
except requests.exceptions.ReadTimeout:
print('Read timeout - Retry for %i time(s)' % (i + 1))
except requests.exceptions.SSLError:
print('SSL error - Retry for %i time(s)' % (i + 1))
except requests.exceptions.ConnectionError:
# Other unhandled network errors go here
print('Connection error - please check your connection.')
os._exit(0)
else:
break
else:
# The loop will terminate after 3 attempts
print('The NCBI API is not available now.')
os._exit(0)
# Check the XML content for citation data
if 'LinkSetDb' not in citation_XML.text:
print('Unable to find citation data for the specific PMID.')
os._exit(0)
# Fetch the 'csrftoken'
s = requests.Session()
s.get(query_URI % PMID)
csrftoken = s.cookies['pm-csrf']
# Prompt for the filename
Tk_root = Tk()
Tk_root.withdraw()
format_mapping = {'nbib': 'pubmed', 'csv': 'csv', 'txt': 'summary-text',
'~pmid': 'pmid', '~abs': 'abstract', '~sum': 'summary-text'}
file_types = [('NBIB Formatted File (PubMed) (*.nbib)', '*.nbib'),
('Comma Separated Values (CSV) File (*.csv)', '*.csv'),
('Text File (PMIDs) (*.txt)', ('*.~pmid', '*.txt')),
('Text File (Summary) (*.txt)', ('*.~sum', '*.txt')),
('Text File (Abstract) (*.txt)', ('*.~abs', '*.txt')),
('All Files (*.*)', '*.*')]
default_name = "pubmed-%i.nbib" % PMID
default_dir = os.path.normpath(os.path.expanduser("~/Desktop"))
filename = asksaveasfilename(initialfile = default_name,
filetypes = file_types,
defaultextension = file_types,
initialdir = default_dir)
# Replace the dummy extensions
if filename:
tmp = filename.split('.')
result_format = format_mapping.get(tmp[-1], 'pmid')
if '~' in tmp[-1]:
tmp[-1] = 'txt'
filename = '.'.join(tmp)
else:
print('Saving aborted.')
os._exit(0)
# Prepare the request
headers={'Referer': query_URI % PMID + '#'}
post_data = {'csrfmiddlewaretoken': csrftoken,
'results-format': result_format,
'term': '',
'term_alias': 'LINKSET|pubmed_pubmed_refs|%i' % PMID}
result = s.post(post_URI, data=post_data, headers=headers, stream=True)
# Write raw data to the file
try:
f = open(filename,"wb")
for chunk in result.iter_content(chunk_size=512):
if chunk:
f.write(chunk) # TODO: Add progress bar?
except IOError:
print("IO Error : Unable to save result to file.")
os._exit(0)
finally:
f.close()