Open
Description
There are many entries in docs/oscar_references.bib
which have neither a DOI nor a URL field, but which clearly an online version exists.
Someone should go through the file and resolve this with help of MathSciNet and zbMATH, at least were possible with reasonable effort.
This would be an ideal task for a HiWi
UPDATE: there is a script at https://tex.stackexchange.com/a/300474 which is super handy for automatically adding DOIs. It is not perfect but still helps. I've run it now, but for future reference I include its code below. Also https://www.doi2bib.org/ is useful for turning DOIs into bib entries (not perfect ones but can still be helpful)
#!/usr/bin/env python
import sys, re
from unidecode import unidecode
import bibtexparser
from bibtexparser.bwriter import BibTexWriter
import http.client as httplib
import urllib
# Search for the DOI given a title; e.g. "computation in Noisy Radio Networks"
# Credit to user13348, slight modifications
# http://tex.stackexchange.com/questions/6810/automatically-adding-doi-fields-to-a-hand-made-bibliography
def searchdoi(title, author):
params = urllib.parse.urlencode({"titlesearch":"titlesearch", "auth2" : author, "atitle2" : title, "multi_hit" : "on", "article_title_search" : "Search", "queryType" : "author-title"})
headers = {"User-Agent": "Mozilla/5.0" , "Accept": "text/html", "Content-Type" : "application/x-www-form-urlencoded", "Host" : "www.crossref.org"}
# conn = httplib.HTTPConnection("www.crossref.org:80") # Not working any more, HTTPS required
conn = httplib.HTTPSConnection("www.crossref.org")
conn.request("POST", "/guestquery/", params, headers)
response = conn.getresponse()
#print(response.status, response.reason)
data = response.read()
conn.close()
return re.search(r'doi\.org/([^"^<^>]+)', str(data))
def normalize(string):
"""Normalize strings to ascii, without latex."""
string = re.sub(r'[{}\\\'"^]',"", string)
string = re.sub(r"\$.*?\$","",string) # better remove all math expressions
return unidecode(string)
def get_authors(entry):
"""Get a list of authors' or editors' last names."""
def get_last_name(authors):
for author in authors :
author = author.strip(" ")
if "," in author:
yield author.split(",")[0]
elif " " in author:
yield author.split(" ")[-1]
else:
yield author
try:
authors = entry["author"]
except KeyError:
authors = entry["editor"]
authors = normalize(authors).split("and")
return list(get_last_name(authors))
print("Reading Bibliography...")
with open(sys.argv[1]) as bibtex_file:
bibliography = bibtexparser.load(bibtex_file)
print("Looking for Dois...")
before = 0
new = 0
total = len(bibliography.entries)
for i,entry in enumerate(bibliography.entries):
print("\r{i}/{total} entries processed, please wait...".format(i=i,total=total),flush=True,end="")
try:
if "doi" not in entry or entry["doi"].isspace():
title = entry["title"]
authors = get_authors(entry)
for author in authors:
doi_match = searchdoi(title,author)
if doi_match:
doi = doi_match.groups()[0]
entry["doi"] = doi
new += 1
else:
before += 1
except:
pass
print("")
template="We added {new} DOIs !\nBefore: {before}/{total} entries had DOI\nNow: {after}/{total} entries have DOI"
print(template.format(new=new,before=before,after=before+new,total=total))
outfile = sys.argv[1]+"_doi.bib"
print("Writing result to ",outfile)
writer = BibTexWriter()
writer.indent = ' ' # indent entries with 4 spaces instead of one
with open(outfile, 'w') as bibfile:
bibfile.write(writer.write(bibliography))