Skip to content

Commit ceb33c2

Browse files
committed
improved formatting
1 parent 1c13539 commit ceb33c2

File tree

5 files changed

+61
-59
lines changed

5 files changed

+61
-59
lines changed

gscholar/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
from gscholar.gscholar import * # noqa
2-
from gscholar.version import __VERSION__ as __VERSION__ # noqa
2+
from gscholar.version import __VERSION__ as __VERSION__ # noqa

gscholar/__main__.py

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77

88
import gscholar as gs
99

10-
logger = logging.getLogger('gscholar')
10+
logger = logging.getLogger("gscholar")
1111
logging.basicConfig(
12-
format='%(asctime)s %(levelname)s %(name)s %(message)s',
13-
level=logging.WARNING
12+
format="%(asctime)s %(levelname)s %(name)s %(message)s",
13+
level=logging.WARNING,
1414
)
1515

1616

@@ -19,46 +19,49 @@ def main() -> None:
1919
usage = 'Usage: %prog [options] {pdf | "search terms"}'
2020
parser = argparse.ArgumentParser(usage)
2121
parser.add_argument(
22-
"-a", "--all", action="store_true",
23-
help="show all bibtex results"
22+
"-a", "--all", action="store_true", help="show all bibtex results"
2423
)
2524
parser.add_argument(
26-
"-d", "--debug", action="store_true",
27-
help="show debugging output"
25+
"-d", "--debug", action="store_true", help="show debugging output"
2826
)
2927
parser.add_argument(
30-
"-r", "--rename", action="store_true",
31-
help="rename file"
28+
"-r", "--rename", action="store_true", help="rename file"
3229
)
3330
parser.add_argument(
34-
"-f", "--outputformat", dest='output', default="bibtex",
31+
"-f",
32+
"--outputformat",
33+
dest="output",
34+
default="bibtex",
3535
help=(
3636
"Output format. Available formats are: bibtex, endnote, refman,"
37-
"wenxianwang [default: %(default)s]"))
38-
parser.add_argument(
39-
"-s", "--startpage",
40-
help="Page number to start parsing PDF file at."
37+
"wenxianwang [default: %(default)s]"
38+
),
4139
)
4240
parser.add_argument(
43-
'--version', action='version', version=gs.__VERSION__)
41+
"-s", "--startpage", help="Page number to start parsing PDF file at."
42+
)
43+
parser.add_argument("--version", action="version", version=gs.__VERSION__)
4444
parser.add_argument(
45-
'keyword', metavar='{pdf | "search terms"}',
46-
help='pdf | "search terms"')
45+
"keyword",
46+
metavar='{pdf | "search terms"}',
47+
help='pdf | "search terms"',
48+
)
4749
args = parser.parse_args()
4850
if args.debug is True:
4951
logger.setLevel(logging.DEBUG)
5052

5153
outformat = {
52-
'bibtex': gs.FORMAT_BIBTEX,
53-
'endnote': gs.FORMAT_ENDNOTE,
54-
'refman': gs.FORMAT_REFMAN,
55-
'wenxianwang': gs.FORMAT_WENXIANWANG,
54+
"bibtex": gs.FORMAT_BIBTEX,
55+
"endnote": gs.FORMAT_ENDNOTE,
56+
"refman": gs.FORMAT_REFMAN,
57+
"wenxianwang": gs.FORMAT_WENXIANWANG,
5658
}[args.output]
5759

5860
pdfmode = False
5961
if os.path.exists(args.keyword):
60-
logger.debug(f"File exist, assuming you want me to lookup the pdf: "
61-
f"{args}.")
62+
logger.debug(
63+
f"File exist, assuming you want me to lookup the pdf: {args}."
64+
)
6265
pdfmode = True
6366
biblist = gs.pdflookup(
6467
args.keyword, args.all, outformat, args.startpage
@@ -78,12 +81,14 @@ def main() -> None:
7881
print(biblist[0])
7982
if args.rename is True:
8083
if not pdfmode:
81-
print("You asked me to rename the pdf but didn't tell me which "
82-
"file to rename, aborting.")
84+
print(
85+
"You asked me to rename the pdf but didn't tell me which "
86+
"file to rename, aborting."
87+
)
8388
sys.exit(1)
8489
else:
8590
gs.rename_file(args.keyword, biblist[0])
8691

8792

88-
if __name__ == '__main__':
93+
if __name__ == "__main__":
8994
main()

gscholar/gscholar.py

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from urllib.request import Request, urlopen
1818

1919
GOOGLE_SCHOLAR_URL = "https://scholar.google.com"
20-
HEADERS = {'User-Agent': 'Mozilla/5.0'}
20+
HEADERS = {"User-Agent": "Mozilla/5.0"}
2121

2222
FORMAT_BIBTEX = 4
2323
FORMAT_ENDNOTE = 3
@@ -29,9 +29,7 @@
2929

3030

3131
def query(
32-
searchstr: str,
33-
outformat: int = FORMAT_BIBTEX,
34-
allresults: bool = False
32+
searchstr: str, outformat: int = FORMAT_BIBTEX, allresults: bool = False
3533
) -> list[str]:
3634
"""Query google scholar.
3735
@@ -53,17 +51,17 @@ def query(
5351
5452
"""
5553
logger.debug(f"Query: {searchstr}")
56-
searchstr = '/scholar?q='+quote(searchstr)
54+
searchstr = "/scholar?q=" + quote(searchstr)
5755
url = GOOGLE_SCHOLAR_URL + searchstr
5856
header = HEADERS
59-
header['Cookie'] = f"GSP=CF={outformat}"
57+
header["Cookie"] = f"GSP=CF={outformat}"
6058
request = Request(url, headers=header)
6159
response = urlopen(request)
6260
# add set_cookie in header in request header!
63-
set_cookie = response.headers['Set-Cookie']
64-
header['Cookie'] += set_cookie
61+
set_cookie = response.headers["Set-Cookie"]
62+
header["Cookie"] += set_cookie
6563
html = response.read()
66-
html = html.decode('utf8')
64+
html = html.decode("utf8")
6765
# grab the links
6866
tmp = get_links(html, outformat)
6967

@@ -72,11 +70,11 @@ def query(
7270
if not allresults:
7371
tmp = tmp[:1]
7472
for link in tmp:
75-
url = GOOGLE_SCHOLAR_URL+link
73+
url = GOOGLE_SCHOLAR_URL + link
7674
request = Request(url, headers=header)
7775
response = urlopen(request)
7876
bib = response.read()
79-
bib = bib.decode('utf8')
77+
bib = bib.decode("utf8")
8078
result.append(bib)
8179
return result
8280

@@ -96,24 +94,25 @@ def get_links(html: str, outformat: int) -> list[str]:
9694
the links to the references
9795
9896
"""
99-
base_url = 'https://scholar.googleusercontent.com'
97+
base_url = "https://scholar.googleusercontent.com"
10098
if outformat == FORMAT_BIBTEX:
101-
refre = re.compile(fr'<a href="{base_url}(/scholar\.bib\?[^"]*)')
99+
refre = re.compile(rf'<a href="{base_url}(/scholar\.bib\?[^"]*)')
102100
elif outformat == FORMAT_ENDNOTE:
103-
refre = re.compile(fr'<a href="{base_url}(/scholar\.enw\?[^"]*)"')
101+
refre = re.compile(rf'<a href="{base_url}(/scholar\.enw\?[^"]*)"')
104102
elif outformat == FORMAT_REFMAN:
105-
refre = re.compile(fr'<a href="{base_url}(/scholar\.ris\?[^"]*)"')
103+
refre = re.compile(rf'<a href="{base_url}(/scholar\.ris\?[^"]*)"')
106104
elif outformat == FORMAT_WENXIANWANG:
107-
refre = re.compile(fr'<a href="{base_url}(/scholar\.ral\?[^"]*)"')
105+
refre = re.compile(rf'<a href="{base_url}(/scholar\.ral\?[^"]*)"')
108106
reflist = refre.findall(html)
109107
# escape html entities
110108
reflist = [
111109
re.sub(
112-
'&({});'.format('|'.join(name2codepoint)),
110+
"&({});".format("|".join(name2codepoint)),
113111
lambda m: chr(name2codepoint[m.group(1)]), # type: ignore[index]
114-
s
112+
s,
115113
)
116-
for s in reflist]
114+
for s in reflist
115+
]
117116
return reflist
118117

119118

@@ -136,20 +135,19 @@ def convert_pdf_to_txt(pdf: str, startpage: int | None = None) -> str:
136135
137136
"""
138137
if startpage is not None:
139-
startpageargs = ['-f', str(startpage)]
138+
startpageargs = ["-f", str(startpage)]
140139
else:
141140
startpageargs = []
142-
stdout = subprocess.Popen(["pdftotext", "-q"] + startpageargs + [pdf, "-"],
143-
stdout=subprocess.PIPE).communicate()[0]
141+
stdout = subprocess.Popen(
142+
["pdftotext", "-q"] + startpageargs + [pdf, "-"],
143+
stdout=subprocess.PIPE,
144+
).communicate()[0]
144145

145146
return stdout.decode()
146147

147148

148149
def pdflookup(
149-
pdf: str,
150-
allresults: bool,
151-
outformat: int,
152-
startpage: int | None = None
150+
pdf: str, allresults: bool, outformat: int, startpage: int | None = None
153151
) -> list[str]:
154152
"""Look a pdf up on google scholar and return bibtex items.
155153
@@ -197,9 +195,9 @@ def _get_bib_element(bibitem: str, element: str) -> str | None:
197195
if i.startswith(element):
198196
value = i.split("=", 1)[-1]
199197
value = value.strip()
200-
while value.endswith(','):
198+
while value.endswith(","):
201199
value = value[:-1]
202-
while value.startswith('{') or value.startswith('"'):
200+
while value.startswith("{") or value.startswith('"'):
203201
value = value[1:-1]
204202
return value
205203
return None
@@ -215,5 +213,5 @@ def rename_file(pdf: str, bibitem: str) -> None:
215213
elem = [i for i in (year, author, title) if i]
216214
filename = "-".join(elem) + ".pdf"
217215
newfile = pdf.replace(os.path.basename(pdf), filename)
218-
logger.info(f'Renaming {pdf} to {newfile}')
216+
logger.info(f"Renaming {pdf} to {newfile}")
219217
os.rename(pdf, newfile)

gscholar/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Provide gscholar's version."""
22

3-
__VERSION__ = '2.1.0'
3+
__VERSION__ = "2.1.0"

tests/test_gscholar.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""test gscholar."""
22

3-
43
import pytest
54

65
from gscholar import gscholar as gs
@@ -9,7 +8,7 @@
98
@pytest.mark.xfail(reason="Google's rate limiter.")
109
def test_query() -> None:
1110
"""Normal query with latin encoding should give non empty result."""
12-
result = gs.query('Albert Einstein', gs.FORMAT_BIBTEX)
11+
result = gs.query("Albert Einstein", gs.FORMAT_BIBTEX)
1312
assert len(result) > 0
1413

1514

0 commit comments

Comments
 (0)