improved formatting

venthur · venthur · commit ceb33c254539 · 2025-07-22T10:22:19.000+02:00
diff --git a/gscholar/__init__.py b/gscholar/__init__.py
@@ -1,2 +1,2 @@
 from gscholar.gscholar import *  # noqa
-from gscholar.version import __VERSION__ as __VERSION__ # noqa
+from gscholar.version import __VERSION__ as __VERSION__  # noqa
diff --git a/gscholar/__main__.py b/gscholar/__main__.py
@@ -7,10 +7,10 @@
 
 import gscholar as gs
 
-logger = logging.getLogger('gscholar')
+logger = logging.getLogger("gscholar")
 logging.basicConfig(
-    format='%(asctime)s %(levelname)s %(name)s %(message)s',
-    level=logging.WARNING
+    format="%(asctime)s %(levelname)s %(name)s %(message)s",
+    level=logging.WARNING,
 )
 
 
@@ -19,46 +19,49 @@ def main() -> None:
     usage = 'Usage: %prog [options] {pdf | "search terms"}'
     parser = argparse.ArgumentParser(usage)
     parser.add_argument(
-        "-a", "--all", action="store_true",
-        help="show all bibtex results"
+        "-a", "--all", action="store_true", help="show all bibtex results"
     )
     parser.add_argument(
-        "-d", "--debug", action="store_true",
-        help="show debugging output"
+        "-d", "--debug", action="store_true", help="show debugging output"
     )
     parser.add_argument(
-        "-r", "--rename", action="store_true",
-        help="rename file"
+        "-r", "--rename", action="store_true", help="rename file"
     )
     parser.add_argument(
-        "-f", "--outputformat", dest='output', default="bibtex",
+        "-f",
+        "--outputformat",
+        dest="output",
+        default="bibtex",
         help=(
             "Output format. Available formats are: bibtex, endnote, refman,"
-            "wenxianwang [default: %(default)s]"))
-    parser.add_argument(
-        "-s", "--startpage",
-        help="Page number to start parsing PDF file at."
+            "wenxianwang [default: %(default)s]"
+        ),
     )
     parser.add_argument(
-        '--version', action='version', version=gs.__VERSION__)
+        "-s", "--startpage", help="Page number to start parsing PDF file at."
+    )
+    parser.add_argument("--version", action="version", version=gs.__VERSION__)
     parser.add_argument(
-        'keyword', metavar='{pdf | "search terms"}',
-        help='pdf | "search terms"')
+        "keyword",
+        metavar='{pdf | "search terms"}',
+        help='pdf | "search terms"',
+    )
     args = parser.parse_args()
     if args.debug is True:
         logger.setLevel(logging.DEBUG)
 
     outformat = {
-        'bibtex': gs.FORMAT_BIBTEX,
-        'endnote': gs.FORMAT_ENDNOTE,
-        'refman': gs.FORMAT_REFMAN,
-        'wenxianwang': gs.FORMAT_WENXIANWANG,
+        "bibtex": gs.FORMAT_BIBTEX,
+        "endnote": gs.FORMAT_ENDNOTE,
+        "refman": gs.FORMAT_REFMAN,
+        "wenxianwang": gs.FORMAT_WENXIANWANG,
     }[args.output]
 
     pdfmode = False
     if os.path.exists(args.keyword):
-        logger.debug(f"File exist, assuming you want me to lookup the pdf: "
-                     f"{args}.")
+        logger.debug(
+            f"File exist, assuming you want me to lookup the pdf: {args}."
+        )
         pdfmode = True
         biblist = gs.pdflookup(
             args.keyword, args.all, outformat, args.startpage
@@ -78,12 +81,14 @@ def main() -> None:
         print(biblist[0])
     if args.rename is True:
         if not pdfmode:
-            print("You asked me to rename the pdf but didn't tell me which "
-                  "file to rename, aborting.")
+            print(
+                "You asked me to rename the pdf but didn't tell me which "
+                "file to rename, aborting."
+            )
             sys.exit(1)
         else:
             gs.rename_file(args.keyword, biblist[0])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/gscholar/gscholar.py b/gscholar/gscholar.py
@@ -17,7 +17,7 @@
 from urllib.request import Request, urlopen
 
 GOOGLE_SCHOLAR_URL = "https://scholar.google.com"
-HEADERS = {'User-Agent': 'Mozilla/5.0'}
+HEADERS = {"User-Agent": "Mozilla/5.0"}
 
 FORMAT_BIBTEX = 4
 FORMAT_ENDNOTE = 3
@@ -29,9 +29,7 @@
 
 
 def query(
-    searchstr: str,
-    outformat: int = FORMAT_BIBTEX,
-    allresults: bool = False
+    searchstr: str, outformat: int = FORMAT_BIBTEX, allresults: bool = False
 ) -> list[str]:
     """Query google scholar.
 
@@ -53,17 +51,17 @@ def query(
 
     """
     logger.debug(f"Query: {searchstr}")
-    searchstr = '/scholar?q='+quote(searchstr)
+    searchstr = "/scholar?q=" + quote(searchstr)
     url = GOOGLE_SCHOLAR_URL + searchstr
     header = HEADERS
-    header['Cookie'] = f"GSP=CF={outformat}"
+    header["Cookie"] = f"GSP=CF={outformat}"
     request = Request(url, headers=header)
     response = urlopen(request)
     # add set_cookie in header in request header!
-    set_cookie = response.headers['Set-Cookie']
-    header['Cookie'] += set_cookie
+    set_cookie = response.headers["Set-Cookie"]
+    header["Cookie"] += set_cookie
     html = response.read()
-    html = html.decode('utf8')
+    html = html.decode("utf8")
     # grab the links
     tmp = get_links(html, outformat)
 
@@ -72,11 +70,11 @@ def query(
     if not allresults:
         tmp = tmp[:1]
     for link in tmp:
-        url = GOOGLE_SCHOLAR_URL+link
+        url = GOOGLE_SCHOLAR_URL + link
         request = Request(url, headers=header)
         response = urlopen(request)
         bib = response.read()
-        bib = bib.decode('utf8')
+        bib = bib.decode("utf8")
         result.append(bib)
     return result
 
@@ -96,24 +94,25 @@ def get_links(html: str, outformat: int) -> list[str]:
         the links to the references
 
     """
-    base_url = 'https://scholar.googleusercontent.com'
+    base_url = "https://scholar.googleusercontent.com"
     if outformat == FORMAT_BIBTEX:
-        refre = re.compile(fr'<a href="{base_url}(/scholar\.bib\?[^"]*)')
+        refre = re.compile(rf'<a href="{base_url}(/scholar\.bib\?[^"]*)')
     elif outformat == FORMAT_ENDNOTE:
-        refre = re.compile(fr'<a href="{base_url}(/scholar\.enw\?[^"]*)"')
+        refre = re.compile(rf'<a href="{base_url}(/scholar\.enw\?[^"]*)"')
     elif outformat == FORMAT_REFMAN:
-        refre = re.compile(fr'<a href="{base_url}(/scholar\.ris\?[^"]*)"')
+        refre = re.compile(rf'<a href="{base_url}(/scholar\.ris\?[^"]*)"')
     elif outformat == FORMAT_WENXIANWANG:
-        refre = re.compile(fr'<a href="{base_url}(/scholar\.ral\?[^"]*)"')
+        refre = re.compile(rf'<a href="{base_url}(/scholar\.ral\?[^"]*)"')
     reflist = refre.findall(html)
     # escape html entities
     reflist = [
         re.sub(
-            '&({});'.format('|'.join(name2codepoint)),
+            "&({});".format("|".join(name2codepoint)),
             lambda m: chr(name2codepoint[m.group(1)]),  # type: ignore[index]
-            s
+            s,
         )
-        for s in reflist]
+        for s in reflist
+    ]
     return reflist
 
 
@@ -136,20 +135,19 @@ def convert_pdf_to_txt(pdf: str, startpage: int | None = None) -> str:
 
     """
     if startpage is not None:
-        startpageargs = ['-f', str(startpage)]
+        startpageargs = ["-f", str(startpage)]
     else:
         startpageargs = []
-    stdout = subprocess.Popen(["pdftotext", "-q"] + startpageargs + [pdf, "-"],
-                              stdout=subprocess.PIPE).communicate()[0]
+    stdout = subprocess.Popen(
+        ["pdftotext", "-q"] + startpageargs + [pdf, "-"],
+        stdout=subprocess.PIPE,
+    ).communicate()[0]
 
     return stdout.decode()
 
 
 def pdflookup(
-        pdf: str,
-        allresults: bool,
-        outformat: int,
-        startpage: int | None = None
+    pdf: str, allresults: bool, outformat: int, startpage: int | None = None
 ) -> list[str]:
     """Look a pdf up on google scholar and return bibtex items.
 
@@ -197,9 +195,9 @@ def _get_bib_element(bibitem: str, element: str) -> str | None:
         if i.startswith(element):
             value = i.split("=", 1)[-1]
             value = value.strip()
-            while value.endswith(','):
+            while value.endswith(","):
                 value = value[:-1]
-            while value.startswith('{') or value.startswith('"'):
+            while value.startswith("{") or value.startswith('"'):
                 value = value[1:-1]
             return value
     return None
@@ -215,5 +213,5 @@ def rename_file(pdf: str, bibitem: str) -> None:
     elem = [i for i in (year, author, title) if i]
     filename = "-".join(elem) + ".pdf"
     newfile = pdf.replace(os.path.basename(pdf), filename)
-    logger.info(f'Renaming {pdf} to {newfile}')
+    logger.info(f"Renaming {pdf} to {newfile}")
     os.rename(pdf, newfile)
diff --git a/gscholar/version.py b/gscholar/version.py
@@ -1,3 +1,3 @@
 """Provide gscholar's version."""
 
-__VERSION__ = '2.1.0'
+__VERSION__ = "2.1.0"
diff --git a/tests/test_gscholar.py b/tests/test_gscholar.py
@@ -1,6 +1,5 @@
 """test gscholar."""
 
-
 import pytest
 
 from gscholar import gscholar as gs
@@ -9,7 +8,7 @@
 @pytest.mark.xfail(reason="Google's rate limiter.")
 def test_query() -> None:
     """Normal query with latin encoding should give non empty result."""
-    result = gs.query('Albert Einstein', gs.FORMAT_BIBTEX)
+    result = gs.query("Albert Einstein", gs.FORMAT_BIBTEX)
     assert len(result) > 0
 
 

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`from gscholar.gscholar import * # noqa`
`2`		`-from gscholar.version import __VERSION__ as __VERSION__ # noqa`
	`2`	`+from gscholar.version import __VERSION__ as __VERSION__ # noqa`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""Provide gscholar's version."""`
`2`	`2`
`3`		`-__VERSION__ = '2.1.0'`
	`3`	`+__VERSION__ = "2.1.0"`