diff --git a/api/dis_plots.py b/api/dis_plots.py
index ae7fade..258b5fb 100644
--- a/api/dis_plots.py
+++ b/api/dis_plots.py
@@ -1,6 +1,6 @@
-''' dis_plots.py
- Plot functions for the DIS UI
-'''
+"""dis_plots.py
+Plot functions for the DIS UI
+"""
from math import pi
import pandas as pd
@@ -13,86 +13,101 @@
SOURCE3_PALETTE = ["mediumblue", "darkorange", "wheat"]
TYPE_PALETTE = ["mediumblue", "darkorange", "wheat", "darkgray"]
+
# ******************************************************************************
# * Utility functions *
# ******************************************************************************
def _preprint_type_piechart(coll, year):
- ''' Create a preprint type pie chart
- Keyword arguments:
- coll: dois collection
- year: year or "All"
- Returns:
- Chart components
- '''
+ """Create a preprint type pie chart
+ Keyword arguments:
+ coll: dois collection
+ year: year or "All"
+ Returns:
+ Chart components
+ """
match = {"type": "posted-content"}
- if year != 'All':
- match['jrc_publishing_date'] = {"$regex": "^"+ year}
- payload = [{"$match": match},
- {"$group": {"_id": {"institution": "$institution"},"count": {"$sum": 1}}}]
+ if year != "All":
+ match["jrc_publishing_date"] = {"$regex": "^" + year}
+ payload = [
+ {"$match": match},
+ {"$group": {"_id": {"institution": "$institution"}, "count": {"$sum": 1}}},
+ ]
try:
rows = coll.aggregate(payload)
except Exception as err:
raise err
data = {}
for row in rows:
- if not row['_id']['institution']:
- data['No institution'] = row['count']
+ if not row["_id"]["institution"]:
+ data["No institution"] = row["count"]
else:
- data[row['_id']['institution'][0]['name']] = row['count']
+ data[row["_id"]["institution"][0]["name"]] = row["count"]
if not data:
return None, None
title = "Preprint DOI institutions"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- return pie_chart(dict(sorted(data.items())), title,
- "source", width=600, height=400, location='bottom_right')
+ return pie_chart(
+ dict(sorted(data.items())),
+ title,
+ "source",
+ width=600,
+ height=400,
+ location="bottom_right",
+ )
def _preprint_capture_piechart(coll, year):
- ''' Create a preprint capture pie chart
- Keyword arguments:
- coll: dois collection
- year: year or "All"
- Returns:
- Chart components
- '''
+ """Create a preprint capture pie chart
+ Keyword arguments:
+ coll: dois collection
+ year: year or "All"
+ Returns:
+ Chart components
+ """
data = {}
- payload = {"subtype": "preprint", "jrc_preprint": {"$exists": 1},
- "relation.is-preprint-of": {"$exists": 0}}
- if year != 'All':
- payload['jrc_publishing_date'] = {"$regex": "^"+ year}
+ payload = {
+ "subtype": "preprint",
+ "jrc_preprint": {"$exists": 1},
+ "relation.is-preprint-of": {"$exists": 0},
+ }
+ if year != "All":
+ payload["jrc_publishing_date"] = {"$regex": "^" + year}
try:
- data['Fuzzy matching'] = coll.count_documents(payload)
+ data["Fuzzy matching"] = coll.count_documents(payload)
except Exception as err:
raise err
- del payload['relation.is-preprint-of']
+ del payload["relation.is-preprint-of"]
try:
- data['Crossref relation'] = coll.count_documents(payload)
+ data["Crossref relation"] = coll.count_documents(payload)
except Exception as err:
raise err
- data['Crossref relation'] = data['Crossref relation'] - data['Fuzzy matching']
- if not data['Crossref relation'] and not data['Fuzzy matching']:
+ data["Crossref relation"] = data["Crossref relation"] - data["Fuzzy matching"]
+ if not data["Crossref relation"] and not data["Fuzzy matching"]:
return None, None
title = "Preprint capture method"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- return pie_chart(data, title, "source", colors=SOURCE_PALETTE, width=600, height=400)
+ return pie_chart(
+ data, title, "source", colors=SOURCE_PALETTE, width=600, height=400
+ )
def preprint_pie_charts(data, year, coll):
- ''' Create a preprint capture pie chart
- Keyword arguments:
- data: dictionary of data
- year: year or "All"
- coll: dois collection
- Returns:
- Chart components
- '''
+ """Create a preprint capture pie chart
+ Keyword arguments:
+ data: dictionary of data
+ year: year or "All"
+ coll: dois collection
+ Returns:
+ Chart components
+ """
title = "DOIs by preprint status"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- chartscript, chartdiv = pie_chart(data, title, "source",
- colors=SOURCE_PALETTE, width=600, height=400)
+ chartscript, chartdiv = pie_chart(
+ data, title, "source", colors=SOURCE_PALETTE, width=600, height=400
+ )
# Preprint types
try:
script2, div2 = _preprint_type_piechart(coll, year)
@@ -111,41 +126,60 @@ def preprint_pie_charts(data, year, coll):
raise err
return chartscript, chartdiv
+
# ******************************************************************************
# * Basic charts *
# ******************************************************************************
-def pie_chart(data, title, legend, height=300, width=400, location="right", colors=None):
- ''' Create a pie chart
- Keyword arguments:
- data: dictionary of data
- title: chart title
- legend: data key name
- height: height of the chart (optional)
- width: width of the chart (optional)
- colors: list of colors (optional)
- Returns:
- Figure components
- '''
+
+def pie_chart(
+ data, title, legend, height=300, width=400, location="right", colors=None
+):
+ """Create a pie chart
+ Keyword arguments:
+ data: dictionary of data
+ title: chart title
+ legend: data key name
+ height: height of the chart (optional)
+ width: width of the chart (optional)
+ colors: list of colors (optional)
+ Returns:
+ Figure components
+ """
if len(data) == 1:
colors = ["mediumblue"]
elif len(data) == 2:
colors = SOURCE_PALETTE
if not colors:
- colors = all_palettes['Category10'][len(data)]
+ colors = all_palettes["Category10"][len(data)]
elif isinstance(colors, str):
print(colors)
colors = all_palettes[colors][len(data)]
- pdata = pd.Series(data).reset_index(name='value').rename(columns={'index': legend})
- pdata['angle'] = pdata['value']/pdata['value'].sum() * 2*pi
- pdata['percentage'] = pdata['value']/pdata['value'].sum()*100
- pdata['color'] = colors
+ pdata = pd.Series(data).reset_index(name="value").rename(columns={"index": legend})
+ pdata["angle"] = pdata["value"] / pdata["value"].sum() * 2 * pi
+ pdata["percentage"] = pdata["value"] / pdata["value"].sum() * 100
+ pdata["color"] = colors
tooltips = f"@{legend}: @value (@percentage%)"
- plt = figure(title=title, toolbar_location=None, height=height, width=width,
- tools="hover", tooltips=tooltips, x_range=(-0.5, 1.0))
- plt.wedge(x=0, y=1, radius=0.4,
- start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
- line_color="white", fill_color='color', legend_field=legend, source=pdata)
+ plt = figure(
+ title=title,
+ toolbar_location=None,
+ height=height,
+ width=width,
+ tools="hover",
+ tooltips=tooltips,
+ x_range=(-0.5, 1.0),
+ )
+ plt.wedge(
+ x=0,
+ y=1,
+ radius=0.4,
+ start_angle=cumsum("angle", include_zero=True),
+ end_angle=cumsum("angle"),
+ line_color="white",
+ fill_color="color",
+ legend_field=legend,
+ source=pdata,
+ )
plt.axis.axis_label = None
plt.axis.visible = False
plt.grid.grid_line_color = None
@@ -154,33 +188,36 @@ def pie_chart(data, title, legend, height=300, width=400, location="right", colo
def stacked_bar_chart(data, title, xaxis, yaxis, colors=None, width=None, height=None):
- ''' Create a stacked bar chart
- Keyword arguments:
- data: dictionary of data
- title: chart title
- xaxis: x-axis column name
- yaxis: list of y-axis column names
- colors: list of colors (optional)
- width: width of chart (optional)
- height: height of chart (optional)
- Returns:
- Figure components
- '''
+ """Create a stacked bar chart
+ Keyword arguments:
+ data: dictionary of data
+ title: chart title
+ xaxis: x-axis column name
+ yaxis: list of y-axis column names
+ colors: list of colors (optional)
+ width: width of chart (optional)
+ height: height of chart (optional)
+ Returns:
+ Figure components
+ """
if not colors:
colors = plasma(len(yaxis))
- plt = figure(x_range=data[xaxis], title=title,
- toolbar_location=None, tools="hover",
- tooltips=f"$name @{xaxis}: @$name")
+ plt = figure(
+ x_range=data[xaxis],
+ title=title,
+ toolbar_location=None,
+ tools="hover",
+ tooltips=f"$name @{xaxis}: @$name",
+ )
if width and height:
plt.width = width
plt.height = height
- plt.vbar_stack(yaxis, x=xaxis, width=0.9,
- color=colors, source=data,
- legend_label=yaxis
- )
- plt.legend.location = 'top_left'
+ plt.vbar_stack(
+ yaxis, x=xaxis, width=0.9, color=colors, source=data, legend_label=yaxis
+ )
+ plt.legend.location = "top_left"
if width and height:
- plt.add_layout(plt.legend[0], 'right')
+ plt.add_layout(plt.legend[0], "right")
plt.xgrid.grid_line_color = None
plt.y_range.start = 0
plt.background_fill_color = "ghostwhite"
diff --git a/api/dis_responder.py b/api/dis_responder.py
index 03a2fb9..992915d 100644
--- a/api/dis_responder.py
+++ b/api/dis_responder.py
@@ -1,6 +1,6 @@
-''' dis_responder.py
- UI and REST API for Data and Information Services
-'''
+"""dis_responder.py
+UI and REST API for Data and Information Services
+"""
from datetime import date, datetime, timedelta
from html import escape
@@ -16,7 +16,7 @@
from time import time
from bokeh.palettes import all_palettes, plasma
import bson
-from flask import (Flask, make_response, render_template, request, jsonify, send_file)
+from flask import Flask, make_response, render_template, request, jsonify, send_file
from flask_cors import CORS
from flask_swagger import swagger
import requests
@@ -30,51 +30,60 @@
# Database
DB = {}
# Custom queries
-CUSTOM_REGEX = {"publishing_year": {"field": "jrc_publishing_date",
- "value": "^!REPLACE!"}
- }
+CUSTOM_REGEX = {
+ "publishing_year": {"field": "jrc_publishing_date", "value": "^!REPLACE!"}
+}
# Navigation
-NAV = {"Home": "",
- "DOIs": {"DOIs by insertion date": "dois_insertpicker",
- "DOIs awaiting processing": "dois_pending",
- "DOIs by publisher": "dois_publisher",
- "DOIs by source": "dois_source",
- "DOIs by year": "dois_year",
- "DOIs by month": "dois_month",
- "DOI yearly report": "dois_report"
- },
- "Authorship": {"DOIs by authorship": "dois_author",
- "DOIs with lab head first/last authors": "doiui_group"},
- "Preprints": {"DOIs by preprint status": "dois_preprint",
- "DOIs by preprint status by year": "dois_preprint_year"},
- "Journals": {"Top journals": "dois_journal"},
- "ORCID": {"Groups": "groups",
- "Entries": "orcid_entry",
- "Duplicates": "orcid_duplicates",
- },
- "Tag/affiliation": {"DOIs by tag": "dois_tag",
- "Top DOI tags by year": "dois_top",
- "Author affiliations": "orcid_tag",
- },
- "Stats" : {"Database": "stats_database"
- },
- "External systems": {"Search People system": "people",
- "Supervisory Organizations": "orgs",
- }
- }
+NAV = {
+ "Home": "",
+ "DOIs": {
+ "DOIs by insertion date": "dois_insertpicker",
+ "DOIs awaiting processing": "dois_pending",
+ "DOIs by publisher": "dois_publisher",
+ "DOIs by source": "dois_source",
+ "DOIs by year": "dois_year",
+ "DOIs by month": "dois_month",
+ "DOI yearly report": "dois_report",
+ },
+ "Authorship": {
+ "DOIs by authorship": "dois_author",
+ "DOIs with lab head first/last authors": "doiui_group",
+ },
+ "Preprints": {
+ "DOIs by preprint status": "dois_preprint",
+ "DOIs by preprint status by year": "dois_preprint_year",
+ },
+ "Journals": {"Top journals": "dois_journal"},
+ "ORCID": {
+ "Groups": "groups",
+ "Entries": "orcid_entry",
+ "Duplicates": "orcid_duplicates",
+ },
+ "Tag/affiliation": {
+ "DOIs by tag": "dois_tag",
+ "Top DOI tags by year": "dois_top",
+ "Author affiliations": "orcid_tag",
+ },
+ "Stats": {"Database": "stats_database"},
+ "External systems": {
+ "Search People system": "people",
+ "Supervisory Organizations": "orgs",
+ },
+}
# Sources
# Dates
-OPSTART = datetime.strptime('2024-05-16','%Y-%m-%d')
+OPSTART = datetime.strptime("2024-05-16", "%Y-%m-%d")
# ******************************************************************************
# * Classes *
# ******************************************************************************
+
class CustomJSONEncoder(JSONEncoder):
- ''' Define a custom JSON encoder
- '''
+ """Define a custom JSON encoder"""
+
def default(self, o):
try:
if isinstance(o, bson.objectid.ObjectId):
@@ -96,8 +105,8 @@ def default(self, o):
class InvalidUsage(Exception):
- ''' Class to populate error return for JSON.
- '''
+ """Class to populate error return for JSON."""
+
def __init__(self, message, status_code=400, payload=None):
Exception.__init__(self)
self.message = message
@@ -105,21 +114,22 @@ def __init__(self, message, status_code=400, payload=None):
self.payload = payload
def to_dict(self):
- ''' Build error response
- '''
+ """Build error response"""
retval = dict(self.payload or ())
- retval['rest'] = {'status_code': self.status_code,
- 'error': True,
- 'error_text': f"{self.message}\n" \
- + f"An exception of type {type(self).__name__} occurred. " \
- + f"Arguments:\n{self.args}"}
+ retval["rest"] = {
+ "status_code": self.status_code,
+ "error": True,
+ "error_text": f"{self.message}\n"
+ + f"An exception of type {type(self).__name__} occurred. "
+ + f"Arguments:\n{self.args}",
+ }
return retval
class CustomException(Exception):
- ''' Class to populate error return for HTML.
- '''
- def __init__(self,message, preface=""):
+ """Class to populate error return for HTML."""
+
+ def __init__(self, message, preface=""):
super().__init__(message)
self.original = type(message).__name__
self.args = message.args
@@ -141,22 +151,30 @@ def __init__(self,message, preface=""):
@app.before_request
def before_request():
- ''' Set transaction start time and increment counters.
- If needed, initilize global variables.
- '''
+ """Set transaction start time and increment counters.
+ If needed, initilize global variables.
+ """
if not DB:
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning("Config error"), message=err)
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning("Config error"),
+ message=err,
+ )
dbo = attrgetter("dis.prod.write")(dbconfig)
print(f"Connecting to {dbo.name} prod on {dbo.host} as {dbo.user}")
try:
- DB['dis'] = JRC.connect_database(dbo)
+ DB["dis"] = JRC.connect_database(dbo)
except Exception as err:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning("Database connect error"), message=err)
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning("Database connect error"),
+ message=err,
+ )
app.config["START_TIME"] = time()
app.config["COUNTER"] += 1
endpoint = request.endpoint if request.endpoint else "(Unknown)"
@@ -166,28 +184,30 @@ def before_request():
return generate_response(result)
return None
+
# ******************************************************************************
# * Error utility functions *
# ******************************************************************************
+
@app.errorhandler(InvalidUsage)
def handle_invalid_usage(error):
- ''' Error handler
- Keyword arguments:
- error: error object
- '''
+ """Error handler
+ Keyword arguments:
+ error: error object
+ """
response = jsonify(error.to_dict())
response.status_code = error.status_code
return response
def error_message(err):
- ''' Create an error message from an exception
- Keyword arguments:
- err: exception
- Returns:
- Error message
- '''
+ """Create an error message from an exception
+ Keyword arguments:
+ err: exception
+ Returns:
+ Error message
+ """
if isinstance(err, CustomException):
msg = f"{err.preface}\n" if err.preface else ""
msg += f"An exception of type {err.original} occurred. Arguments:\n{err.args}"
@@ -197,99 +217,120 @@ def error_message(err):
def inspect_error(err, errtype):
- ''' Render an error with inspection
- Keyword arguments:
- err: exception
- Returns:
- Error screen
- '''
- mess = f"In {inspect.stack()[1][3]}, An exception of type {type(err).__name__} occurred. " \
- + f"Arguments:\n{err.args}"
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning(errtype), message=mess)
-
-
-def render_warning(msg, severity='error', size='lg'):
- ''' Render warning HTML
- Keyword arguments:
- msg: message
- severity: severity (warning, error, info, or success)
- size: glyph size
- Returns:
- HTML rendered warning
- '''
- icon = 'exclamation-triangle'
- color = 'goldenrod'
- if severity == 'error':
- color = 'red'
- elif severity == 'success':
- icon = 'check-circle'
- color = 'lime'
- elif severity == 'info':
- icon = 'circle-info'
- color = 'blue'
- elif severity == 'na':
- icon = 'minus-circle'
- color = 'gray'
- elif severity == 'missing':
- icon = 'minus-circle'
- elif severity == 'no':
- icon = 'times-circle'
- color = 'red'
- elif severity == 'warning':
- icon = 'exclamation-circle'
- return f"" \
- + f" {msg}"
+ """Render an error with inspection
+ Keyword arguments:
+ err: exception
+ Returns:
+ Error screen
+ """
+ mess = (
+ f"In {inspect.stack()[1][3]}, An exception of type {type(err).__name__} occurred. "
+ + f"Arguments:\n{err.args}"
+ )
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(errtype),
+ message=mess,
+ )
+
+
+def render_warning(msg, severity="error", size="lg"):
+ """Render warning HTML
+ Keyword arguments:
+ msg: message
+ severity: severity (warning, error, info, or success)
+ size: glyph size
+ Returns:
+ HTML rendered warning
+ """
+ icon = "exclamation-triangle"
+ color = "goldenrod"
+ if severity == "error":
+ color = "red"
+ elif severity == "success":
+ icon = "check-circle"
+ color = "lime"
+ elif severity == "info":
+ icon = "circle-info"
+ color = "blue"
+ elif severity == "na":
+ icon = "minus-circle"
+ color = "gray"
+ elif severity == "missing":
+ icon = "minus-circle"
+ elif severity == "no":
+ icon = "times-circle"
+ color = "red"
+ elif severity == "warning":
+ icon = "exclamation-circle"
+ return (
+ f""
+ + f" {msg}"
+ )
+
# ******************************************************************************
# * Navigation utility functions *
# ******************************************************************************
+
def generate_navbar(active):
- ''' Generate the web navigation bar
- Keyword arguments:
- Navigation bar
- '''
- nav = '''
+ """Generate the web navigation bar
+ Keyword arguments:
+ Navigation bar
+ """
+ nav = """
'
+ nav += ""
return nav
+
# ******************************************************************************
# * Payload utility functions *
# ******************************************************************************
+
def receive_payload():
- ''' Get a request payload (form or JSON).
- Keyword arguments:
- None
- Returns:
- payload dictionary
- '''
+ """Get a request payload (form or JSON).
+ Keyword arguments:
+ None
+ Returns:
+ payload dictionary
+ """
pay = {}
if not request.get_data():
return pay
@@ -305,232 +346,274 @@ def receive_payload():
def initialize_result():
- ''' Initialize the result dictionary
- Returns:
- decoded partially populated result dictionary
- '''
- result = {"rest": {"requester": request.remote_addr,
- "url": request.url,
- "endpoint": request.endpoint,
- "error": False,
- "elapsed_time": "",
- "row_count": 0,
- "pid": os.getpid()}}
+ """Initialize the result dictionary
+ Returns:
+ decoded partially populated result dictionary
+ """
+ result = {
+ "rest": {
+ "requester": request.remote_addr,
+ "url": request.url,
+ "endpoint": request.endpoint,
+ "error": False,
+ "elapsed_time": "",
+ "row_count": 0,
+ "pid": os.getpid(),
+ }
+ }
if app.config["LAST_TRANSACTION"]:
- print(f"Seconds since last transaction: {time() - app.config['LAST_TRANSACTION']}")
+ print(
+ f"Seconds since last transaction: {time() - app.config['LAST_TRANSACTION']}"
+ )
app.config["LAST_TRANSACTION"] = time()
return result
def generate_response(result):
- ''' Generate a response to a request
- Keyword arguments:
- result: result dictionary
- Returns:
- JSON response
- '''
- result["rest"]["elapsed_time"] = str(timedelta(seconds=time() - app.config["START_TIME"]))
+ """Generate a response to a request
+ Keyword arguments:
+ result: result dictionary
+ Returns:
+ JSON response
+ """
+ result["rest"]["elapsed_time"] = str(
+ timedelta(seconds=time() - app.config["START_TIME"])
+ )
return jsonify(**result)
def get_custom_payload(ipd, display_value):
- ''' Get custom payload
- Keyword arguments:
- ipd: input payload dictionary
- display_value: display value
- Returns:
- payload: payload for MongoDB find
- ptitle: page title
- '''
- if ipd['field'] in CUSTOM_REGEX:
- rex = CUSTOM_REGEX[ipd['field']]['value']
- ipd['value'] = {"$regex": rex.replace("!REPLACE!", ipd['value'])}
- ipd['field'] = CUSTOM_REGEX[ipd['field']]['field']
+ """Get custom payload
+ Keyword arguments:
+ ipd: input payload dictionary
+ display_value: display value
+ Returns:
+ payload: payload for MongoDB find
+ ptitle: page title
+ """
+ if ipd["field"] in CUSTOM_REGEX:
+ rex = CUSTOM_REGEX[ipd["field"]]["value"]
+ ipd["value"] = {"$regex": rex.replace("!REPLACE!", ipd["value"])}
+ ipd["field"] = CUSTOM_REGEX[ipd["field"]]["field"]
ptitle = f"DOIs for {ipd['field']} {display_value}"
- payload = {ipd['field']: ipd['value']}
- if 'jrc_obtained_from' in ipd and ipd['jrc_obtained_from']:
- payload['jrc_obtained_from'] = ipd['jrc_obtained_from']
+ payload = {ipd["field"]: ipd["value"]}
+ if "jrc_obtained_from" in ipd and ipd["jrc_obtained_from"]:
+ payload["jrc_obtained_from"] = ipd["jrc_obtained_from"]
ptitle += f" from {ipd['jrc_obtained_from']}"
return payload, ptitle
+
# ******************************************************************************
# * ORCID utility functions *
# ******************************************************************************
+
def get_work_publication_date(wsumm):
- ''' Get a publication date from an ORCID work summary
- Keyword arguments:
- wsumm: ORCID work summary
- Returns:
- Publication date
- '''
- pdate = ''
- if 'publication-date' in wsumm and wsumm['publication-date']:
- ppd = wsumm['publication-date']
- if 'year' in ppd and ppd['year']['value']:
- pdate = ppd['year']['value']
- if 'month' in ppd and ppd['month'] and ppd['month']['value']:
+ """Get a publication date from an ORCID work summary
+ Keyword arguments:
+ wsumm: ORCID work summary
+ Returns:
+ Publication date
+ """
+ pdate = ""
+ if "publication-date" in wsumm and wsumm["publication-date"]:
+ ppd = wsumm["publication-date"]
+ if "year" in ppd and ppd["year"]["value"]:
+ pdate = ppd["year"]["value"]
+ if "month" in ppd and ppd["month"] and ppd["month"]["value"]:
pdate += f"-{ppd['month']['value']}"
- if 'day' in ppd and ppd['day'] and ppd['day']['value']:
+ if "day" in ppd and ppd["day"] and ppd["day"]["value"]:
pdate += f"-{ppd['day']['value']}"
return pdate
def get_work_doi(work):
- ''' Get a DOI from an ORCID work
- Keyword arguments:
- work: ORCID work
- Returns:
- DOI
- '''
- if not work['external-ids']['external-id']:
- return ''
- for eid in work['external-ids']['external-id']:
- if eid['external-id-type'] != 'doi':
+ """Get a DOI from an ORCID work
+ Keyword arguments:
+ work: ORCID work
+ Returns:
+ DOI
+ """
+ if not work["external-ids"]["external-id"]:
+ return ""
+ for eid in work["external-ids"]["external-id"]:
+ if eid["external-id-type"] != "doi":
continue
- if 'external-id-normalized' in eid:
- return eid['external-id-normalized']['value']
- if 'external-id-value' in eid:
- return eid['external-id-url']['value']
- return ''
+ if "external-id-normalized" in eid:
+ return eid["external-id-normalized"]["value"]
+ if "external-id-value" in eid:
+ return eid["external-id-url"]["value"]
+ return ""
def orcid_payload(oid, orc, eid=None):
- ''' Generate a payload for searching the dois collection by ORCID or employeeId
- Keyword arguments:
- oid: ORCID or employeeId
- orc: orcid record
- eid: employeeId boolean
- Returns:
- Payload
- '''
+ """Generate a payload for searching the dois collection by ORCID or employeeId
+ Keyword arguments:
+ oid: ORCID or employeeId
+ orc: orcid record
+ eid: employeeId boolean
+ Returns:
+ Payload
+ """
# Name only search
- payload = {"$and": [{"$or": [{"author.given": {"$in": orc['given']}},
- {"creators.givenName": {"$in": orc['given']}}]},
- {"$or": [{"author.family": {"$in": orc['family']}},
- {"creators.familyName": {"$in": orc['family']}}]}]
- }
+ payload = {
+ "$and": [
+ {
+ "$or": [
+ {"author.given": {"$in": orc["given"]}},
+ {"creators.givenName": {"$in": orc["given"]}},
+ ]
+ },
+ {
+ "$or": [
+ {"author.family": {"$in": orc["family"]}},
+ {"creators.familyName": {"$in": orc["family"]}},
+ ]
+ },
+ ]
+ }
if eid and not oid:
# Employee ID only search
payload = {"$or": [{"jrc_author": eid}, {"$and": payload["$and"]}]}
elif oid and eid:
# Search by either name or employee ID
- payload = {"$or": [{"orcid": oid}, {"jrc_author": eid}, {"$and": payload["$and"]}]}
+ payload = {
+ "$or": [{"orcid": oid}, {"jrc_author": eid}, {"$and": payload["$and"]}]
+ }
return payload
def get_dois_for_orcid(oid, orc, use_eid, both):
- ''' Generate DOIs for a single user
- Keyword arguments:
- oid: ORCID or employeeId
- orc: orcid record
- use_eid: use employeeId boolean
- both: search by both ORCID and employeeId
- Returns:
- HTML and a list of DOIs
- '''
+ """Generate DOIs for a single user
+ Keyword arguments:
+ oid: ORCID or employeeId
+ orc: orcid record
+ use_eid: use employeeId boolean
+ both: search by both ORCID and employeeId
+ Returns:
+ HTML and a list of DOIs
+ """
try:
if use_eid:
payload = {"jrc_author": oid}
elif both:
- eid = orc['employeeId'] if 'employeeId' in orc else None
+ eid = orc["employeeId"] if "employeeId" in orc else None
payload = orcid_payload(oid, orc, eid)
else:
payload = orcid_payload(oid, orc)
- rows = DB['dis'].dois.find(payload)
+ rows = DB["dis"].dois.find(payload)
except Exception as err:
- raise CustomException(err, "Could not find in dois collection by name.") from err
+ raise CustomException(
+ err, "Could not find in dois collection by name."
+ ) from err
return rows
def generate_works_table(rows, name=None):
- ''' Generate table HTML for a person's works
- Keyword arguments:
- rows: rows from dois collection
- name: search key [optional]
- Returns:
- HTML and a list of DOIs
- '''
+ """Generate table HTML for a person's works
+ Keyword arguments:
+ rows: rows from dois collection
+ name: search key [optional]
+ Returns:
+ HTML and a list of DOIs
+ """
works = []
dois = []
authors = {}
html = ""
fileoutput = ""
for row in rows:
- doi = doi_link(row['doi']) if row['doi'] else " "
- if 'title' in row and isinstance(row['title'], str):
- title = row['title']
+ doi = doi_link(row["doi"]) if row["doi"] else " "
+ if "title" in row and isinstance(row["title"], str):
+ title = row["title"]
else:
title = DL.get_title(row)
- dois.append(row['doi'])
- payload = {"date": DL.get_publishing_date(row),
- "doi": doi,
- "title": title
- }
+ dois.append(row["doi"])
+ payload = {"date": DL.get_publishing_date(row), "doi": doi, "title": title}
works.append(payload)
fileoutput += f"{payload['date']}\t{row['doi']}\t{payload['title']}\n"
if name:
alist = DL.get_author_details(row)
if alist:
for auth in alist:
- if "family" in auth and "given" in auth \
- and auth["family"].lower() == name.lower():
+ if (
+ "family" in auth
+ and "given" in auth
+ and auth["family"].lower() == name.lower()
+ ):
authors[f"{auth['given']} {auth['family']}"] = True
else:
print(f"Could not get author details for {row['doi']}")
if not works:
return html, []
- html += "
" \
- + '
Published
DOI
Title
'
- for work in sorted(works, key=lambda row: row['date'], reverse=True):
- html += f"
{work['date']}
{work['doi'] if work['doi'] else ' '}
" \
- + f"
{work['title']}
"
+ html += (
+ "
"
+ + "
Published
DOI
Title
"
+ )
+ for work in sorted(works, key=lambda row: row["date"], reverse=True):
+ html += (
+ f"
{work['date']}
{work['doi'] if work['doi'] else ' '}
"
+ + f"
{work['title']}
"
+ )
if dois:
html += "
"
if authors:
- html = f" Authors found: {', '.join(sorted(authors.keys()))} " \
- + f"This may include non-Janelia authors {html}"
- html = create_downloadable('works', ['Published', 'DOI', 'Title'], fileoutput) + html
+ html = (
+ f" Authors found: {', '.join(sorted(authors.keys()))} "
+ + f"This may include non-Janelia authors {html}"
+ )
+ html = (
+ create_downloadable("works", ["Published", "DOI", "Title"], fileoutput) + html
+ )
html = f"DOIs: {len(works)} " + html
return html, dois
def get_orcid_from_db(oid, use_eid=False, both=False, bare=False):
- ''' Generate HTML for an ORCID or employeeId that is in the orcid collection
- Keyword arguments:
- oid: ORCID or employeeId
- use_eid: use employeeId boolean
- both: search by both ORCID and employeeId
- bare: entry has no ORCID or employeeId
- Returns:
- HTML and a list of DOIs
- '''
+ """Generate HTML for an ORCID or employeeId that is in the orcid collection
+ Keyword arguments:
+ oid: ORCID or employeeId
+ use_eid: use employeeId boolean
+ both: search by both ORCID and employeeId
+ bare: entry has no ORCID or employeeId
+ Returns:
+ HTML and a list of DOIs
+ """
try:
if bare:
- orc = DB['dis'].orcid.find_one({"_id": bson.ObjectId(oid)})
+ orc = DB["dis"].orcid.find_one({"_id": bson.ObjectId(oid)})
else:
- payload = {'userIdO365' if use_eid else 'orcid': oid}
- orc = DB['dis'].orcid.find_one(payload)
+ payload = {"userIdO365" if use_eid else "orcid": oid}
+ orc = DB["dis"].orcid.find_one(payload)
except Exception as err:
- raise CustomException(err, "Could not find_one in orcid collection by ORCID ID.") from err
+ raise CustomException(
+ err, "Could not find_one in orcid collection by ORCID ID."
+ ) from err
if not orc:
return "", []
html = "
"
try:
if use_eid:
- oid = orc['employeeId']
+ oid = orc["employeeId"]
rows = get_dois_for_orcid(oid, orc, use_eid, both)
except Exception as err:
raise err
@@ -543,88 +626,106 @@ def get_orcid_from_db(oid, use_eid=False, both=False, bare=False):
def add_orcid_works(data, dois):
- ''' Generate HTML for a list of works from ORCID
- Keyword arguments:
- data: ORCID data
- dois: list of DOIs from dois collection
- Returns:
- HTML for a list of works from ORCID
- '''
+ """Generate HTML for a list of works from ORCID
+ Keyword arguments:
+ data: ORCID data
+ dois: list of DOIs from dois collection
+ Returns:
+ HTML for a list of works from ORCID
+ """
html = inner = ""
works = 0
- for work in data['activities-summary']['works']['group']:
- wsumm = work['work-summary'][0]
+ for work in data["activities-summary"]["works"]["group"]:
+ wsumm = work["work-summary"][0]
pdate = get_work_publication_date(wsumm)
doi = get_work_doi(work)
if (not doi) or (doi in dois):
continue
works += 1
if not doi:
- inner += f"
{pdate}
" \
- + f"
{wsumm['title']['title']['value']}
"
+ inner += (
+ f"
{pdate}
"
+ + f"
{wsumm['title']['title']['value']}
"
+ )
continue
link = ""
- if work['external-ids']['external-id'][0]['external-id-url']:
- if work['external-ids']['external-id'][0]['external-id-url']:
- link = "{doi}"
+ if work["external-ids"]["external-id"][0]["external-id-url"]:
+ if work["external-ids"]["external-id"][0]["external-id-url"]:
+ link = (
+ "{doi}"
+ )
else:
link = doi_link(doi)
- inner += f"
{pdate}
{link}
" \
- + f"
{wsumm['title']['title']['value']}
"
+ inner += (
+ f"
{pdate}
{link}
"
+ + f"
{wsumm['title']['title']['value']}
"
+ )
if inner:
title = "title is" if works == 1 else f"{works} titles are"
- html += f"The additional {title} from ORCID. Note that titles below may " \
- + "be self-reported, may not have DOIs available, or may be from the author's " \
- + "employment outside of Janelia."
- html += '
' \
- + '
Published
DOI
Title
' \
- + f"
{inner}
"
+ html += (
+ f"The additional {title} from ORCID. Note that titles below may "
+ + "be self-reported, may not have DOIs available, or may be from the author's "
+ + "employment outside of Janelia."
+ )
+ html += (
+ '
'
+ + "
Published
DOI
Title
"
+ + f"
{inner}
"
+ )
return html
def generate_user_table(rows):
- ''' Generate HTML for a list of users
- Keyword arguments:
- rows: rows from orcid collection
- Returns:
- HTML for a list of authors with a count
- '''
+ """Generate HTML for a list of users
+ Keyword arguments:
+ rows: rows from orcid collection
+ Returns:
+ HTML for a list of authors with a count
+ """
count = 0
- html = '
' \
- + '
ORCID
Given name
Family name
' \
- + '
Status
'
+ html = (
+ '
'
+ + "
ORCID
Given name
Family name
"
+ + "
Status
"
+ )
for row in rows:
count += 1
- if 'orcid' in row:
+ if "orcid" in row:
link = f"{row['orcid']}"
- elif 'userIdO365' in row:
+ elif "userIdO365" in row:
link = f"No ORCID found"
else:
link = f"No ORCID found"
- auth = DL.get_single_author_details(row, DB['dis'].orcid)
+ auth = DL.get_single_author_details(row, DB["dis"].orcid)
badges = get_badges(auth)
- rclass = 'other' if (auth and auth['alumni']) else 'active'
- html += f"
{link}
{', '.join(row['given'])}
" \
- + f"
{', '.join(row['family'])}
{' '.join(badges)}
"
- html += '
'
- cbutton = ""
+ rclass = "other" if (auth and auth["alumni"]) else "active"
+ html += (
+ f"
{link}
{', '.join(row['given'])}
"
+ + f"
{', '.join(row['family'])}
{' '.join(badges)}
"
+ )
+ html += "
"
+ cbutton = (
+ '"
+ )
html = cbutton + html
return html, count
+
# ******************************************************************************
# * DOI utility functions *
# ******************************************************************************
+
def doi_link(doi):
- ''' Return a link to a DOI or DOIs
- Keyword arguments:
- doi: DOI
- Returns:
- newdoi: HTML link(s) to DOI(s) as a string
- '''
+ """Return a link to a DOI or DOIs
+ Keyword arguments:
+ doi: DOI
+ Returns:
+ newdoi: HTML link(s) to DOI(s) as a string
+ """
if not doi:
return ""
doilist = [doi] if isinstance(doi, str) else doi
@@ -639,40 +740,40 @@ def doi_link(doi):
def get_doi(doi):
- ''' Get a single DOI record
- Keyword arguments:
- doi: DOI
- Returns:
- source: data source
- data: data from response
- '''
+ """Get a single DOI record
+ Keyword arguments:
+ doi: DOI
+ Returns:
+ source: data source
+ data: data from response
+ """
if DL.is_datacite(doi):
resp = JRC.call_datacite(doi)
- source = 'datacite'
- data = resp['data']['attributes'] if 'data' in resp else {}
+ source = "datacite"
+ data = resp["data"]["attributes"] if "data" in resp else {}
else:
resp = JRC.call_crossref(doi)
- source = 'crossref'
- data = resp['message'] if 'message' in resp else {}
+ source = "crossref"
+ data = resp["message"] if "message" in resp else {}
return source, data
def add_jrc_fields(row):
- ''' Add a table of custom JRC fields
- Keyword arguments:
- row: DOI record
- Returns:
- HTML
- '''
+ """Add a table of custom JRC fields
+ Keyword arguments:
+ row: DOI record
+ Returns:
+ HTML
+ """
jrc = {}
prog = re.compile("^jrc_")
for key, val in row.items():
- if not re.match(prog, key) or key in app.config['DO_NOT_DISPLAY']:
+ if not re.match(prog, key) or key in app.config["DO_NOT_DISPLAY"]:
continue
- if isinstance(val, list) and key not in ('jrc_preprint'):
+ if isinstance(val, list) and key not in ("jrc_preprint"):
try:
if isinstance(val[0], dict):
- val = ", ".join(sorted(elem['name'] for elem in val))
+ val = ", ".join(sorted(elem["name"] for elem in val))
else:
val = ", ".join(sorted(val))
except TypeError:
@@ -683,14 +784,14 @@ def add_jrc_fields(row):
html = '
'
for key in sorted(jrc):
val = jrc[key]
- if key == 'jrc_author':
+ if key == "jrc_author":
link = []
for auth in val.split(", "):
link.append(f"{auth}")
val = ", ".join(link)
- if key == 'jrc_preprint':
+ if key == "jrc_preprint":
val = doi_link(val)
- elif 'jrc_tag' in key:
+ elif "jrc_tag" in key:
link = []
for aff in val.split(", "):
link.append(f"{aff}")
@@ -701,280 +802,341 @@ def add_jrc_fields(row):
def add_relations(row):
- ''' Create a list of relations
- Keyword arguments:
- row: DOI record
- Returns:
- HTML
- '''
+ """Create a list of relations
+ Keyword arguments:
+ row: DOI record
+ Returns:
+ HTML
+ """
html = ""
- if "relation" in row and row['relation']:
+ if "relation" in row and row["relation"]:
# Crossref relations
- for rel in row['relation']:
+ for rel in row["relation"]:
used = []
- for itm in row['relation'][rel]:
- if itm['id'] in used:
+ for itm in row["relation"][rel]:
+ if itm["id"] in used:
continue
html += f"This DOI {rel.replace('-', ' ')} {doi_link(itm['id'])} "
- used.append(itm['id'])
- elif 'relatedIdentifiers' in row and row['relatedIdentifiers']:
+ used.append(itm["id"])
+ elif "relatedIdentifiers" in row and row["relatedIdentifiers"]:
# DataCite relations
- for rel in row['relatedIdentifiers']:
- if 'relatedIdentifierType' in rel and rel['relatedIdentifierType'] == 'DOI':
- words = re.split('(?<=.)(?=[A-Z])', rel['relationType'])
- html += f"This DOI {' '.join(wrd.lower() for wrd in words)} " \
- + f"{doi_link(rel['relatedIdentifier'])} "
+ for rel in row["relatedIdentifiers"]:
+ if "relatedIdentifierType" in rel and rel["relatedIdentifierType"] == "DOI":
+ words = re.split("(?<=.)(?=[A-Z])", rel["relationType"])
+ html += (
+ f"This DOI {' '.join(wrd.lower() for wrd in words)} "
+ + f"{doi_link(rel['relatedIdentifier'])} "
+ )
return html
def get_migration_data(row):
- ''' Create a migration record for a single DOI
- Keyword arguments:
- doi: doi record
- orgs: dictionary of organizations/codes
- Returns:
- migration dictionary
- '''
+ """Create a migration record for a single DOI
+ Keyword arguments:
+ doi: doi record
+ orgs: dictionary of organizations/codes
+ Returns:
+ migration dictionary
+ """
rec = {}
# Author
tags = []
- if 'jrc_tag' in row and row['jrc_tag']:
- if isinstance(row['jrc_tag'][0], dict):
- for atag in row['jrc_tag']:
+ if "jrc_tag" in row and row["jrc_tag"]:
+ if isinstance(row["jrc_tag"][0], dict):
+ for atag in row["jrc_tag"]:
tags.append(atag)
- #else:
+ # else:
# #TAG Old style - can delete after cutover
# for atag in row['jrc_tag']:
# code = orgs[atag] if atag in orgs else None
# tags.append({"name": atag, "code": code})
- if 'jrc_author' in row:
- rec['jrc_author'] = row['jrc_author']
+ if "jrc_author" in row:
+ rec["jrc_author"] = row["jrc_author"]
if tags:
- rec['tags'] = tags
+ rec["tags"] = tags
# Additional data
- if row['jrc_obtained_from'] == 'Crossref' and 'abstract' in row:
- rec['abstract'] = row['abstract']
- rec['journal'] = DL.get_journal(row)
- if 'jrc_publishing_date' in row:
- rec['jrc_publishing_date'] = row['jrc_publishing_date']
- if 'publisher' in row:
- rec['publisher'] = row['publisher']
- rec['title'] = DL.get_title(row)
- if 'URL' in row:
- rec['url'] = row['URL']
+ if row["jrc_obtained_from"] == "Crossref" and "abstract" in row:
+ rec["abstract"] = row["abstract"]
+ rec["journal"] = DL.get_journal(row)
+ if "jrc_publishing_date" in row:
+ rec["jrc_publishing_date"] = row["jrc_publishing_date"]
+ if "publisher" in row:
+ rec["publisher"] = row["publisher"]
+ rec["title"] = DL.get_title(row)
+ if "URL" in row:
+ rec["url"] = row["URL"]
return rec
def compute_preprint_data(rows):
- ''' Create a dictionaries of preprint data
- Keyword arguments:
- rows: preprint types
- Returns:
- data: preprint data dictionary
- preprint: preprint types dictionary
- '''
- data = {'Has preprint relation': 0}
+ """Create a dictionaries of preprint data
+ Keyword arguments:
+ rows: preprint types
+ Returns:
+ data: preprint data dictionary
+ preprint: preprint types dictionary
+ """
+ data = {"Has preprint relation": 0}
preprint = {}
for row in rows:
- if 'type' in row['_id']:
- preprint[row['_id']['type']] = row['count']
- data['Has preprint relation'] += row['count']
+ if "type" in row["_id"]:
+ preprint[row["_id"]["type"]] = row["count"]
+ data["Has preprint relation"] += row["count"]
else:
- preprint['DataCite'] = row['count']
- data['Has preprint relation'] += row['count']
- for key in ('journal-article', 'posted-content', 'DataCite'):
+ preprint["DataCite"] = row["count"]
+ data["Has preprint relation"] += row["count"]
+ for key in ("journal-article", "posted-content", "DataCite"):
if key not in preprint:
preprint[key] = 0
return data, preprint
def counts_by_type(rows):
- ''' Count DOIs by type
- Keyword arguments:
- rows: aggregate rows from dois collection
- Returns:
- Dictionary of type counts
- '''
+ """Count DOIs by type
+ Keyword arguments:
+ rows: aggregate rows from dois collection
+ Returns:
+ Dictionary of type counts
+ """
typed = {}
preprints = 0
for row in rows:
- typ = row['_id']['type'] if 'type' in row['_id'] else "DataCite"
- sub = row['_id']['subtype'] if 'subtype' in row['_id'] else ""
- if sub == 'preprint':
- preprints += row['count']
- typ = 'posted-content'
- elif (typ == 'DataCite' and row['_id']['DataCite'] == 'Preprint'):
- preprints += row['count']
+ typ = row["_id"]["type"] if "type" in row["_id"] else "DataCite"
+ sub = row["_id"]["subtype"] if "subtype" in row["_id"] else ""
+ if sub == "preprint":
+ preprints += row["count"]
+ typ = "posted-content"
+ elif typ == "DataCite" and row["_id"]["DataCite"] == "Preprint":
+ preprints += row["count"]
if typ not in typed:
typed[typ] = 0
- typed[typ] += row['count']
- typed['preprints'] = preprints
+ typed[typ] += row["count"]
+ typed["preprints"] = preprints
return typed
def get_first_last_authors(year):
- ''' Get first and last author counts
- Keyword arguments:
- year: year to get counts for
- Returns:
- First and last author counts
- '''
- stat = {'first': {}, 'last': {}, 'any': {}}
+ """Get first and last author counts
+ Keyword arguments:
+ year: year to get counts for
+ Returns:
+ First and last author counts
+ """
+ stat = {"first": {}, "last": {}, "any": {}}
for which in ("first", "last", "any"):
- if which == 'any':
- payload = [{"$match": {"jrc_publishing_date": {"$regex": "^"+ year},
- "jrc_author": {"$exists": True}}},
- {"$group": {"_id": {"type": "$type", "subtype": "$subtype",
- "DataCite": "$types.resourceTypeGeneral"},
- "count": {"$sum": 1}}}
- ]
+ if which == "any":
+ payload = [
+ {
+ "$match": {
+ "jrc_publishing_date": {"$regex": "^" + year},
+ "jrc_author": {"$exists": True},
+ }
+ },
+ {
+ "$group": {
+ "_id": {
+ "type": "$type",
+ "subtype": "$subtype",
+ "DataCite": "$types.resourceTypeGeneral",
+ },
+ "count": {"$sum": 1},
+ }
+ },
+ ]
else:
- payload = [{"$match": {"jrc_publishing_date": {"$regex": "^"+ year},
- f"jrc_{which}_author": {"$exists": True}}},
- {"$group": {"_id": {"type": "$type", "subtype": "$subtype",
- "DataCite": "$types.resourceTypeGeneral"},
- "count": {"$sum": 1}}}
- ]
+ payload = [
+ {
+ "$match": {
+ "jrc_publishing_date": {"$regex": "^" + year},
+ f"jrc_{which}_author": {"$exists": True},
+ }
+ },
+ {
+ "$group": {
+ "_id": {
+ "type": "$type",
+ "subtype": "$subtype",
+ "DataCite": "$types.resourceTypeGeneral",
+ },
+ "count": {"$sum": 1},
+ }
+ },
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get yearly metrics " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get yearly metrics " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
for row in rows:
- typ = row['_id']['type'] if 'type' in row['_id'] else "DataCite"
- sub = row['_id']['subtype'] if 'subtype' in row['_id'] else ""
- if sub == 'preprint':
- typ = 'posted-content'
+ typ = row["_id"]["type"] if "type" in row["_id"] else "DataCite"
+ sub = row["_id"]["subtype"] if "subtype" in row["_id"] else ""
+ if sub == "preprint":
+ typ = "posted-content"
if typ not in stat[which]:
stat[which][typ] = 0
- stat[which][typ] += row['count']
- if sub == 'preprint' or (type == 'DataCite' and row['_id']['DataCite'] == 'Preprint'):
- if 'preprints' not in stat[which]:
- stat[which]['preprints'] = 0
- stat[which]['preprints'] += row['count']
- return stat['first'], stat['last'], stat['any']
+ stat[which][typ] += row["count"]
+ if sub == "preprint" or (
+ type == "DataCite" and row["_id"]["DataCite"] == "Preprint"
+ ):
+ if "preprints" not in stat[which]:
+ stat[which]["preprints"] = 0
+ stat[which]["preprints"] += row["count"]
+ return stat["first"], stat["last"], stat["any"]
def get_no_relation(year=None):
- ''' Get DOIs with no relation
- Keyword arguments:
- year: year (optional)
- Returns:
- Dictionary of types/subtypes with no relation
- '''
+ """Get DOIs with no relation
+ Keyword arguments:
+ year: year (optional)
+ Returns:
+ Dictionary of types/subtypes with no relation
+ """
no_relation = {"Crossref": {}, "DataCite": {}}
- payload = {"Crossref_journal": {"type": "journal-article", "subtype": {"$ne": "preprint"},
- "jrc_preprint": {"$exists": False}},
- "Crossref_preprint": {"subtype": "preprint", "jrc_preprint": {"$exists": False}},
- "DataCite_journal": {"jrc_obtained_from": "DataCite",
- "types.resourceTypeGeneral": {"$ne": "Preprint"},
- "jrc_preprint": {"$exists": False}},
- "DataCite_preprint": {"types.resourceTypeGeneral": "Preprint",
- "jrc_preprint": {"$exists": False}}
- }
+ payload = {
+ "Crossref_journal": {
+ "type": "journal-article",
+ "subtype": {"$ne": "preprint"},
+ "jrc_preprint": {"$exists": False},
+ },
+ "Crossref_preprint": {
+ "subtype": "preprint",
+ "jrc_preprint": {"$exists": False},
+ },
+ "DataCite_journal": {
+ "jrc_obtained_from": "DataCite",
+ "types.resourceTypeGeneral": {"$ne": "Preprint"},
+ "jrc_preprint": {"$exists": False},
+ },
+ "DataCite_preprint": {
+ "types.resourceTypeGeneral": "Preprint",
+ "jrc_preprint": {"$exists": False},
+ },
+ }
if year:
for pay in payload.values():
- pay["jrc_publishing_date"] = {"$regex": "^"+ year}
+ pay["jrc_publishing_date"] = {"$regex": "^" + year}
for key, val in payload.items():
try:
- cnt = DB['dis'].dois.count_documents(val)
+ cnt = DB["dis"].dois.count_documents(val)
except Exception as err:
raise err
- src, typ = key.split('_')
+ src, typ = key.split("_")
no_relation[src][typ] = cnt
return no_relation
def get_preprint_stats(rows):
- ''' Create a dictionary of preprint statistics
- Keyword arguments:
- rows: types/subtypes over years
- Returns:
- Preprint statistics dictionary
- '''
+ """Create a dictionary of preprint statistics
+ Keyword arguments:
+ rows: types/subtypes over years
+ Returns:
+ Preprint statistics dictionary
+ """
stat = {}
for row in rows:
- if 'type' not in row['_id']:
+ if "type" not in row["_id"]:
continue
- if 'sub' in row['_id'] and row['_id']['sub'] == 'preprint':
- if row['_id']['year'] not in stat:
- stat[row['_id']['year']] = {}
- for sub in ('journal', 'preprint'):
- if sub not in stat[row['_id']['year']]:
- stat[row['_id']['year']][sub] = 0
- stat[row['_id']['year']]['preprint'] += row['count']
- elif row['_id']['type'] == 'journal-article':
- if row['_id']['year'] not in stat:
- stat[row['_id']['year']] = {}
- for sub in ('journal', 'preprint'):
- if sub not in stat[row['_id']['year']]:
- stat[row['_id']['year']][sub] = 0
- stat[row['_id']['year']]['journal'] += row['count']
+ if "sub" in row["_id"] and row["_id"]["sub"] == "preprint":
+ if row["_id"]["year"] not in stat:
+ stat[row["_id"]["year"]] = {}
+ for sub in ("journal", "preprint"):
+ if sub not in stat[row["_id"]["year"]]:
+ stat[row["_id"]["year"]][sub] = 0
+ stat[row["_id"]["year"]]["preprint"] += row["count"]
+ elif row["_id"]["type"] == "journal-article":
+ if row["_id"]["year"] not in stat:
+ stat[row["_id"]["year"]] = {}
+ for sub in ("journal", "preprint"):
+ if sub not in stat[row["_id"]["year"]]:
+ stat[row["_id"]["year"]][sub] = 0
+ stat[row["_id"]["year"]]["journal"] += row["count"]
return stat
def get_source_data(year):
- ''' Get DOI data by source and type/subtype or resourceTypeGeneral
- Keyword arguments:
- year: year to get data for
- Returns:
- Data dictionary and html dictionary
- '''
+ """Get DOI data by source and type/subtype or resourceTypeGeneral
+ Keyword arguments:
+ year: year to get data for
+ Returns:
+ Data dictionary and html dictionary
+ """
# Crossref
- if year != 'All':
- match = {"jrc_obtained_from": "Crossref",
- "jrc_publishing_date": {"$regex": "^"+ year}}
+ if year != "All":
+ match = {
+ "jrc_obtained_from": "Crossref",
+ "jrc_publishing_date": {"$regex": "^" + year},
+ }
else:
match = {"jrc_obtained_from": "Crossref"}
- payload = [{"$match": match},
- {"$group": {"_id": {"source": "$jrc_obtained_from", "type": "$type",
- "subtype": "$subtype"},
- "count": {"$sum": 1}}},
- ]
+ payload = [
+ {"$match": match},
+ {
+ "$group": {
+ "_id": {
+ "source": "$jrc_obtained_from",
+ "type": "$type",
+ "subtype": "$subtype",
+ },
+ "count": {"$sum": 1},
+ }
+ },
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get Crossref types from dois"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get Crossref types from dois"),
+ message=error_message(err),
+ )
data = {"Crossref": 0, "DataCite": 0}
hdict = {}
for row in rows:
- for field in ('type', 'subtype'):
- if field not in row['_id']:
- row['_id'][field] = ''
- data['Crossref'] += row['count']
- hdict["_".join([row['_id']['source'], row['_id']['type'],
- row['_id']['subtype']])] = row['count']
+ for field in ("type", "subtype"):
+ if field not in row["_id"]:
+ row["_id"][field] = ""
+ data["Crossref"] += row["count"]
+ hdict[
+ "_".join([row["_id"]["source"], row["_id"]["type"], row["_id"]["subtype"]])
+ ] = row["count"]
# DataCite
- match['jrc_obtained_from'] = "DataCite"
- payload = [{"$match": match},
- {"$group": {"_id": "$types.resourceTypeGeneral","count": {"$sum": 1}}}
- ]
+ match["jrc_obtained_from"] = "DataCite"
+ payload = [
+ {"$match": match},
+ {"$group": {"_id": "$types.resourceTypeGeneral", "count": {"$sum": 1}}},
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get DataCite types from dois"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get DataCite types from dois"),
+ message=error_message(err),
+ )
for row in rows:
- data['DataCite'] += row['count']
- hdict["_".join(['DataCite', row['_id'], ""])] = row['count']
+ data["DataCite"] += row["count"]
+ hdict["_".join(["DataCite", row["_id"], ""])] = row["count"]
return data, hdict
-def s2_citation_count(doi, fmt='plain'):
- ''' Get citation count from Semantic Scholar
- Keyword arguments:
- doi: DOI
- fmt: format (plain or html)
- Returns:
- Citation count
- '''
+def s2_citation_count(doi, fmt="plain"):
+ """Get citation count from Semantic Scholar
+ Keyword arguments:
+ doi: DOI
+ fmt: format (plain or html)
+ Returns:
+ Citation count
+ """
url = f"{app.config['S2_GRAPH']}paper/DOI:{doi}?fields=citationCount"
- headers = {'x-api-key': app.config['S2_API_KEY']}
+ headers = {"x-api-key": app.config["S2_API_KEY"]}
try:
resp = requests.get(url, headers=headers, timeout=10)
if resp.status_code == 429:
@@ -982,11 +1144,13 @@ def s2_citation_count(doi, fmt='plain'):
if resp.status_code != 200:
return 0
data = resp.json()
- if fmt == 'html':
- cnt = f"" \
- + f"{data['citationCount']}"
+ if fmt == "html":
+ cnt = (
+ f""
+ + f"{data['citationCount']}"
+ )
else:
- cnt = data['citationCount']
+ cnt = data["citationCount"]
return cnt
except Exception:
return 0
@@ -996,15 +1160,16 @@ def s2_citation_count(doi, fmt='plain'):
# * Badge utility functions *
# ******************************************************************************
+
def tiny_badge(btype, msg, link=None):
- ''' Create HTML for a [very] small badge
- Keyword arguments:
- btype: badge type (success, danger, etc.)
- msg: message to show on badge
- link: link to other web page
- Returns:
- HTML
- '''
+ """Create HTML for a [very] small badge
+ Keyword arguments:
+ btype: badge type (success, danger, etc.)
+ msg: message to show on badge
+ link: link to other web page
+ Returns:
+ HTML
+ """
html = f"{msg}"
if link:
html = f"{html}"
@@ -1012,126 +1177,133 @@ def tiny_badge(btype, msg, link=None):
def get_badges(auth):
- ''' Create a list of badges for an author
- Keyword arguments:
- auth: detailed author record
- Returns:
- List of HTML badges
- '''
+ """Create a list of badges for an author
+ Keyword arguments:
+ auth: detailed author record
+ Returns:
+ List of HTML badges
+ """
badges = []
- if 'in_database' in auth and auth['in_database']:
+ if "in_database" in auth and auth["in_database"]:
badges.append(f"{tiny_badge('success', 'In database')}")
- if auth['alumni']:
+ if auth["alumni"]:
badges.append(f"{tiny_badge('danger', 'Alumni')}")
- elif 'validated' not in auth or not auth['validated']:
+ elif "validated" not in auth or not auth["validated"]:
badges.append(f"{tiny_badge('warning', 'Not validated')}")
- if 'orcid' not in auth or not auth['orcid']:
+ if "orcid" not in auth or not auth["orcid"]:
badges.append(f"{tiny_badge('urgent', 'No ORCID')}")
- if auth['asserted']:
+ if auth["asserted"]:
badges.append(f"{tiny_badge('info', 'Janelia affiliation')}")
- if 'duplicate_name' in auth:
+ if "duplicate_name" in auth:
badges.append(f"{tiny_badge('warning', 'Duplicate name')}")
else:
badges.append(f"{tiny_badge('danger', 'Not in database')}")
- if 'asserted' in auth and auth['asserted']:
+ if "asserted" in auth and auth["asserted"]:
badges.append(f"{tiny_badge('info', 'Janelia affiliation')}")
return badges
def show_tagged_authors(authors):
- ''' Create a list of Janelian authors (with badges and tags)
- Keyword arguments:
- authors: list of detailed authors from a publication
- Returns:
- List of HTML authors
- '''
+ """Create a list of Janelian authors (with badges and tags)
+ Keyword arguments:
+ authors: list of detailed authors from a publication
+ Returns:
+ List of HTML authors
+ """
alist = []
count = 0
for auth in authors:
- if (not auth['janelian']) and (not auth['asserted']) and (not auth['alumni']):
+ if (not auth["janelian"]) and (not auth["asserted"]) and (not auth["alumni"]):
continue
- if auth['janelian'] or auth['asserted']:
+ if auth["janelian"] or auth["asserted"]:
count += 1
who = f"{auth['given']} {auth['family']}"
- if 'orcid' in auth and auth['orcid']:
+ if "orcid" in auth and auth["orcid"]:
who = f"{who}"
- elif 'userIdO365' in auth and auth['userIdO365']:
+ elif "userIdO365" in auth and auth["userIdO365"]:
who = f"{who}"
badges = get_badges(auth)
tags = []
- if 'group' in auth:
- tags.append(auth['group'])
- if 'tags' in auth:
- for tag in auth['tags']:
+ if "group" in auth:
+ tags.append(auth["group"])
+ if "tags" in auth:
+ for tag in auth["tags"]:
if tag not in tags:
tags.append(tag)
tags.sort()
row = f"
{who}
{' '.join(badges)}
{', '.join(tags)}
"
alist.append(row)
- return f"
{'
'.join(alist)}
", count
+ return (
+ f"
{'
'.join(alist)}
",
+ count,
+ )
def add_orcid_badges(orc):
- ''' Generate badges for an ORCID ID that is in the orcid collection
- Keyword arguments:
- orc: row from orcid collection
- Returns:
- List of badges
- '''
+ """Generate badges for an ORCID ID that is in the orcid collection
+ Keyword arguments:
+ orc: row from orcid collection
+ Returns:
+ List of badges
+ """
badges = []
- badges.append(tiny_badge('success', 'In database'))
- if 'duplicate_name' in orc:
- badges.append(tiny_badge('warning', 'Duplicate name'))
- if 'orcid' not in orc or not orc['orcid']:
+ badges.append(tiny_badge("success", "In database"))
+ if "duplicate_name" in orc:
+ badges.append(tiny_badge("warning", "Duplicate name"))
+ if "orcid" not in orc or not orc["orcid"]:
badges.append(f"{tiny_badge('urgent', 'No ORCID')}")
- if 'alumni' in orc:
- badges.append(tiny_badge('danger', 'Alumni'))
- if 'employeeId' not in orc:
- badges.append(tiny_badge('warning', 'Not validated'))
+ if "alumni" in orc:
+ badges.append(tiny_badge("danger", "Alumni"))
+ if "employeeId" not in orc:
+ badges.append(tiny_badge("warning", "Not validated"))
return badges
+
# ******************************************************************************
# * General utility functions *
# ******************************************************************************
+
def random_string(strlen=8):
- ''' Generate a random string of letters and digits
- Keyword arguments:
- strlen: length of generated string
- '''
+ """Generate a random string of letters and digits
+ Keyword arguments:
+ strlen: length of generated string
+ """
cmps = string.ascii_letters + string.digits
- return ''.join(random.choice(cmps) for i in range(strlen))
+ return "".join(random.choice(cmps) for i in range(strlen))
def create_downloadable(name, header, content):
- ''' Generate a downloadable content file
- Keyword arguments:
- name: base file name
- header: table header
- content: table content
- Returns:
- File name
- '''
+ """Generate a downloadable content file
+ Keyword arguments:
+ name: base file name
+ header: table header
+ content: table content
+ Returns:
+ File name
+ """
fname = f"{name}_{random_string()}_{datetime.today().strftime('%Y%m%d%H%M%S')}.tsv"
with open(f"/tmp/{fname}", "w", encoding="utf8") as text_file:
if header:
content = "\t".join(header) + "\n" + content
text_file.write(content)
- return f'Download tab-delimited file'
-
-
-def humansize(num, suffix='B', places=2, space='disk'):
- ''' Return a human-readable storage size
- Keyword arguments:
- num: size
- suffix: default suffix
- space: "disk" or "mem"
- Returns:
- string
- '''
- limit = 1024.0 if space == 'disk' else 1000.0
- for unit in ['', 'K', 'M', 'G', 'T']:
+ return (
+ f'Download tab-delimited file'
+ )
+
+
+def humansize(num, suffix="B", places=2, space="disk"):
+ """Return a human-readable storage size
+ Keyword arguments:
+ num: size
+ suffix: default suffix
+ space: "disk" or "mem"
+ Returns:
+ string
+ """
+ limit = 1024.0 if space == "disk" else 1000.0
+ for unit in ["", "K", "M", "G", "T"]:
if abs(num) < limit:
return f"{num:.{places}f}{unit}{suffix}"
num /= limit
@@ -1139,24 +1311,24 @@ def humansize(num, suffix='B', places=2, space='disk'):
def dloop(row, keys, sep="\t"):
- ''' Generate a string of joined velues from a dictionary
- Keyword arguments:
- row: dictionary
- keys: list of keys
- sep: separator
- Returns:
- Joined values from a dictionary
- '''
+ """Generate a string of joined velues from a dictionary
+ Keyword arguments:
+ row: dictionary
+ keys: list of keys
+ sep: separator
+ Returns:
+ Joined values from a dictionary
+ """
return sep.join([str(row[fld]) for fld in keys])
def last_thursday():
- ''' Calculate the date of the most recent Thursday
- Keyword arguments:
- None
- Returns:
- Date of the most recent Thursday
- '''
+ """Calculate the date of the most recent Thursday
+ Keyword arguments:
+ None
+ Returns:
+ Date of the most recent Thursday
+ """
today = date.today()
offset = (today.weekday() - 3) % 7
if offset:
@@ -1165,29 +1337,31 @@ def last_thursday():
def weeks_ago(weeks):
- ''' Calculate the date of a number of weeks ago
- Keyword arguments:
- weeks: number of weeks
- Returns:
- Date of a number of weeks ago
- '''
+ """Calculate the date of a number of weeks ago
+ Keyword arguments:
+ weeks: number of weeks
+ Returns:
+ Date of a number of weeks ago
+ """
today = date.today()
return today - timedelta(weeks=weeks)
def year_pulldown(prefix, all_years=True):
- ''' Generate a year pulldown
- Keyword arguments:
- prefic: navigation prefix
- Returns:
- Pulldown HTML
- '''
- years = ['All'] if all_years else []
+ """Generate a year pulldown
+ Keyword arguments:
+ prefic: navigation prefix
+ Returns:
+ Pulldown HTML
+ """
+ years = ["All"] if all_years else []
for year in range(datetime.now().year, 2005, -1):
years.append(str(year))
- html = "
"
+ html = (
+ "
"
+ )
for year in years:
html += f"{year}"
html += "
"
@@ -1198,32 +1372,33 @@ def year_pulldown(prefix, all_years=True):
# * Documentation *
# *****************************************************************************
-@app.route('/doc')
+
+@app.route("/doc")
def get_doc_json():
- ''' Show documentation
- '''
+ """Show documentation"""
try:
swag = swagger(app)
except Exception as err:
- return inspect_error(err, 'Could not parse swag')
- swag['info']['version'] = __version__
- swag['info']['title'] = "Data and Information Services"
+ return inspect_error(err, "Could not parse swag")
+ swag["info"]["version"] = __version__
+ swag["info"]["title"] = "Data and Information Services"
return jsonify(swag)
-@app.route('/help')
+@app.route("/help")
def show_swagger():
- ''' Show Swagger docs
- '''
- return render_template('swagger_ui.html')
+ """Show Swagger docs"""
+ return render_template("swagger_ui.html")
+
# *****************************************************************************
# * Admin endpoints *
# *****************************************************************************
+
@app.route("/stats")
def stats():
- '''
+ """
Show stats
Show uptime/requests statistics
---
@@ -1234,29 +1409,32 @@ def stats():
description: Stats
400:
description: Stats could not be calculated
- '''
- tbt = time() - app.config['LAST_TRANSACTION']
+ """
+ tbt = time() - app.config["LAST_TRANSACTION"]
result = initialize_result()
- start = datetime.fromtimestamp(app.config['START_TIME']).strftime('%Y-%m-%d %H:%M:%S')
- up_time = datetime.now() - app.config['STARTDT']
- result['stats'] = {"version": __version__,
- "requests": app.config['COUNTER'],
- "start_time": start,
- "uptime": str(up_time),
- "python": sys.version,
- "pid": os.getpid(),
- "endpoint_counts": app.config['ENDPOINTS'],
- "time_since_last_transaction": tbt,
- }
+ start = datetime.fromtimestamp(app.config["START_TIME"]).strftime(
+ "%Y-%m-%d %H:%M:%S"
+ )
+ up_time = datetime.now() - app.config["STARTDT"]
+ result["stats"] = {
+ "version": __version__,
+ "requests": app.config["COUNTER"],
+ "start_time": start,
+ "uptime": str(up_time),
+ "python": sys.version,
+ "pid": os.getpid(),
+ "endpoint_counts": app.config["ENDPOINTS"],
+ "time_since_last_transaction": tbt,
+ }
return generate_response(result)
# ******************************************************************************
# * API endpoints (DOI) *
# ******************************************************************************
-@app.route('/doi/authors/')
+@app.route("/doi/authors/")
def show_doi_authors(doi):
- '''
+ """
Return a DOI's authors
Return information on authors for a given DOI.
---
@@ -1274,46 +1452,46 @@ def show_doi_authors(doi):
description: DOI data
500:
description: MongoDB error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
- result['data'] = []
+ result["data"] = []
return generate_response(result)
try:
- authors = DL.get_author_details(row, DB['dis'].orcid)
+ authors = DL.get_author_details(row, DB["dis"].orcid)
except Exception as err:
raise InvalidUsage(str(err), 500) from err
tagname = []
tags = []
try:
- orgs = DL.get_supervisory_orgs(DB['dis'].suporg)
+ orgs = DL.get_supervisory_orgs(DB["dis"].suporg)
except Exception as err:
raise InvalidUsage("Could not get supervisory orgs: " + str(err), 500) from err
- if 'jrc_tag' in row:
- for atag in row['jrc_tag']:
- if atag['name'] not in tagname:
- if atag['name'] in orgs:
- code = atag['code']
- tagtype = atag['type']
+ if "jrc_tag" in row:
+ for atag in row["jrc_tag"]:
+ if atag["name"] not in tagname:
+ if atag["name"] in orgs:
+ code = atag["code"]
+ tagtype = atag["type"]
else:
code = None
tagtype = None
- tagname.append(atag['name'])
- tags.append({"name": atag['name'], "code": code, "type": tagtype})
+ tagname.append(atag["name"])
+ tags.append({"name": atag["name"], "code": code, "type": tagtype})
if tags:
- result['tags'] = tags
- result['data'] = authors
+ result["tags"] = tags
+ result["data"] = authors
return generate_response(result)
-@app.route('/doi/janelians/')
+@app.route("/doi/janelians/")
def show_doi_janelians(doi):
- '''
+ """
Return a DOI's Janelia authors
Return information on Janelia authors for a given DOI.
---
@@ -1331,28 +1509,28 @@ def show_doi_janelians(doi):
description: DOI data
500:
description: MongoDB error
- '''
+ """
result = initialize_result()
resp = show_doi_authors(doi)
data = resp.json
- result['data'] = []
+ result["data"] = []
tags = []
- for auth in data['data']:
- if auth['janelian']:
- result['data'].append(auth)
- if 'tags' in auth:
- for atag in auth['tags']:
+ for auth in data["data"]:
+ if auth["janelian"]:
+ result["data"].append(auth)
+ if "tags" in auth:
+ for atag in auth["tags"]:
if atag not in tags:
tags.append(atag)
if tags:
tags.sort()
- result['tags'] = tags
+ result["tags"] = tags
return generate_response(result)
-@app.route('/doi/migration/')
+@app.route("/doi/migration/")
def show_doi_migration(doi):
- '''
+ """
Return a DOI's migration record
Return migration information for a given DOI.
---
@@ -1370,11 +1548,11 @@ def show_doi_migration(doi):
description: DOI data
500:
description: MongoDB error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
@@ -1384,16 +1562,16 @@ def show_doi_migration(doi):
rec = get_migration_data(row)
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- rec['doi'] = doi
- result['data'] = rec
- result['rest']['source'] = 'mongo'
- result['rest']['row_count'] = len(result['data'])
+ rec["doi"] = doi
+ result["data"] = rec
+ result["rest"]["source"] = "mongo"
+ result["rest"]["row_count"] = len(result["data"])
return generate_response(result)
-@app.route('/doi/migrations/')
+@app.route("/doi/migrations/")
def show_doi_migrations(idate):
- '''
+ """
Return migration records for DOIs inserted since a specified date
Return migration records for DOIs inserted since a specified date.
---
@@ -1411,35 +1589,37 @@ def show_doi_migrations(idate):
description: DOI data
500:
description: MongoDB error
- '''
+ """
result = initialize_result()
try:
- isodate = datetime.strptime(idate,'%Y-%m-%d')
+ isodate = datetime.strptime(idate, "%Y-%m-%d")
except Exception as err:
raise InvalidUsage(str(err), 400) from err
try:
- rows = DB['dis'].dois.find({"jrc_author": {"$exists": True},
- "jrc_inserted": {"$gte" : isodate}}, {'_id': 0})
+ rows = DB["dis"].dois.find(
+ {"jrc_author": {"$exists": True}, "jrc_inserted": {"$gte": isodate}},
+ {"_id": 0},
+ )
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- result['rest']['row_count'] = 0
- result['rest']['source'] = 'mongo'
- result['data'] = []
+ result["rest"]["row_count"] = 0
+ result["rest"]["source"] = "mongo"
+ result["data"] = []
for row in rows:
try:
- doi = row['doi']
+ doi = row["doi"]
rec = get_migration_data(row)
- rec['doi'] = doi
- result['data'].append(rec)
+ rec["doi"] = doi
+ result["data"].append(rec)
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- result['rest']['row_count'] = len(result['data'])
+ result["rest"]["row_count"] = len(result["data"])
return generate_response(result)
-@app.route('/doi/')
+@app.route("/doi/")
def show_doi(doi):
- '''
+ """
Return a DOI
Return Crossref or DataCite information for a given DOI.
If it's not in the dois collection, it will be retrieved from Crossref or Datacite.
@@ -1458,27 +1638,27 @@ def show_doi(doi):
description: DOI data
500:
description: MongoDB error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if row:
- result['rest']['row_count'] = 1
- result['rest']['source'] = 'mongo'
- result['data'] = row
+ result["rest"]["row_count"] = 1
+ result["rest"]["source"] = "mongo"
+ result["data"] = row
return generate_response(result)
- result['rest']['source'], result['data'] = get_doi(doi)
- if result['data']:
- result['rest']['row_count'] = 1
+ result["rest"]["source"], result["data"] = get_doi(doi)
+ if result["data"]:
+ result["rest"]["row_count"] = 1
return generate_response(result)
-@app.route('/doi/inserted/')
+@app.route("/doi/inserted/")
def show_inserted(idate):
- '''
+ """
Return DOIs inserted since a specified date
Return all DOIs that have been inserted since midnight on a specified date.
---
@@ -1498,29 +1678,29 @@ def show_inserted(idate):
description: bad input data
500:
description: MongoDB error
- '''
+ """
result = initialize_result()
try:
- isodate = datetime.strptime(idate,'%Y-%m-%d')
+ isodate = datetime.strptime(idate, "%Y-%m-%d")
except Exception as err:
raise InvalidUsage(str(err), 400) from err
try:
- rows = DB['dis'].dois.find({"jrc_inserted": {"$gte" : isodate}}, {'_id': 0})
+ rows = DB["dis"].dois.find({"jrc_inserted": {"$gte": isodate}}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- result['rest']['row_count'] = 0
- result['rest']['source'] = 'mongo'
- result['data'] = []
+ result["rest"]["row_count"] = 0
+ result["rest"]["source"] = "mongo"
+ result["data"] = []
for row in rows:
- result['data'].append(row)
- result['rest']['row_count'] += 1
+ result["data"].append(row)
+ result["rest"]["row_count"] += 1
return generate_response(result)
-@app.route('/citation/')
-@app.route('/citation/dis/')
+@app.route("/citation/")
+@app.route("/citation/dis/")
def show_citation(doi):
- '''
+ """
Return a DIS-style citation
Return a DIS-style citation for a given DOI.
---
@@ -1540,29 +1720,29 @@ def show_citation(doi):
description: DOI not found
500:
description: MongoDB or formatting error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
raise InvalidUsage(f"DOI {doi} is not in the database", 404)
- result['rest']['row_count'] = 1
- result['rest']['source'] = 'mongo'
+ result["rest"]["row_count"] = 1
+ result["rest"]["source"] = "mongo"
authors = DL.get_author_list(row)
title = DL.get_title(row)
- result['data'] = f"{authors} {title}. https://doi.org/{doi}."
- if 'jrc_preprint' in row:
- result['jrc_preprint'] = row['jrc_preprint']
+ result["data"] = f"{authors} {title}. https://doi.org/{doi}."
+ if "jrc_preprint" in row:
+ result["jrc_preprint"] = row["jrc_preprint"]
return generate_response(result)
-@app.route('/citations', defaults={'ctype': 'dis'}, methods=['OPTIONS', 'POST'])
-@app.route('/citations/', methods=['OPTIONS', 'POST'])
-def show_multiple_citations(ctype='dis'):
- '''
+@app.route("/citations", defaults={"ctype": "dis"}, methods=["OPTIONS", "POST"])
+@app.route("/citations/", methods=["OPTIONS", "POST"])
+def show_multiple_citations(ctype="dis"):
+ """
Return citations
Return a dictionary of citations for a list of given DOIs.
---
@@ -1586,36 +1766,36 @@ def show_multiple_citations(ctype='dis'):
description: DOI data
500:
description: MongoDB or formatting error
- '''
+ """
result = initialize_result()
ipd = receive_payload()
- if "dois" not in ipd or not (ipd['dois']) or not isinstance(ipd['dois'], list):
+ if "dois" not in ipd or not (ipd["dois"]) or not isinstance(ipd["dois"], list):
raise InvalidUsage("You must specify a list of DOIs")
- result['rest']['source'] = 'mongo'
- result['data'] = {}
- for doi in ipd['dois']:
+ result["rest"]["source"] = "mongo"
+ result["data"] = {}
+ for doi in ipd["dois"]:
try:
- row = DB['dis'].dois.find_one({"doi": doi.tolower()}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi.tolower()}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
- result['data'][doi] = ''
+ result["data"][doi] = ""
continue
- result['rest']['row_count'] += 1
+ result["rest"]["row_count"] += 1
authors = DL.get_author_list(row, style=ctype)
title = DL.get_title(row)
journal = DL.get_journal(row)
- result['data'][doi] = f"{authors} {title}."
- if ctype == 'dis':
- result['data'][doi] = f"{result['data'][doi]}. https://doi.org/{doi}."
+ result["data"][doi] = f"{authors} {title}."
+ if ctype == "dis":
+ result["data"][doi] = f"{result['data'][doi]}. https://doi.org/{doi}."
else:
- result['data'][doi] = f"{result['data'][doi]}. {journal}."
+ result["data"][doi] = f"{result['data'][doi]}. {journal}."
return generate_response(result)
-@app.route('/citation/flylight/')
+@app.route("/citation/flylight/")
def show_flylight_citation(doi):
- '''
+ """
Return a FlyLight-style citation
Return a FlyLight-style citation for a given DOI.
---
@@ -1635,29 +1815,29 @@ def show_flylight_citation(doi):
description: DOI not found
500:
description: MongoDB or formatting error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
raise InvalidUsage(f"DOI {doi} is not in the database", 404)
- result['rest']['row_count'] = 1
- result['rest']['source'] = 'mongo'
- authors = DL.get_author_list(row, style='flylight')
+ result["rest"]["row_count"] = 1
+ result["rest"]["source"] = "mongo"
+ authors = DL.get_author_list(row, style="flylight")
title = DL.get_title(row)
journal = DL.get_journal(row)
- result['data'] = f"{authors} {title}. {journal}."
- if 'jrc_preprint' in row:
- result['jrc_preprint'] = row['jrc_preprint']
+ result["data"] = f"{authors} {title}. {journal}."
+ if "jrc_preprint" in row:
+ result["jrc_preprint"] = row["jrc_preprint"]
return generate_response(result)
-@app.route('/citation/full/')
+@app.route("/citation/full/")
def show_full_citation(doi):
- '''
+ """
Return a full citation
Return a full citation (DIS+journal) for a given DOI.
---
@@ -1677,29 +1857,29 @@ def show_full_citation(doi):
description: DOI not found
500:
description: MongoDB or formatting error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
raise InvalidUsage(f"DOI {doi} is not in the database", 404)
- result['rest']['row_count'] = 1
- result['rest']['source'] = 'mongo'
+ result["rest"]["row_count"] = 1
+ result["rest"]["source"] = "mongo"
authors = DL.get_author_list(row)
title = DL.get_title(row)
journal = DL.get_journal(row)
- result['data'] = f"{authors} {title}. {journal}."
- if 'jrc_preprint' in row:
- result['jrc_preprint'] = row['jrc_preprint']
+ result["data"] = f"{authors} {title}. {journal}."
+ if "jrc_preprint" in row:
+ result["jrc_preprint"] = row["jrc_preprint"]
return generate_response(result)
-@app.route('/components/')
+@app.route("/components/")
def show_components(doi):
- '''
+ """
Return components of a DIS-style citation
Return components of a DIS-style citation for a given DOI.
---
@@ -1719,30 +1899,31 @@ def show_components(doi):
description: DOI not found
500:
description: MongoDB or formatting error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
raise InvalidUsage(f"DOI {doi} is not in the database", 404)
- result['rest']['row_count'] = 1
- result['rest']['source'] = 'mongo'
- result['data'] = {"authors": DL.get_author_list(row, returntype="list"),
- "journal": DL.get_journal(row),
- "publishing_date": DL.get_publishing_date(row),
- "title": DL.get_title(row)
- }
- if row['jrc_obtained_from'] == 'Crossref' and 'abstract' in row:
- result['data']['abstract'] = row['abstract']
+ result["rest"]["row_count"] = 1
+ result["rest"]["source"] = "mongo"
+ result["data"] = {
+ "authors": DL.get_author_list(row, returntype="list"),
+ "journal": DL.get_journal(row),
+ "publishing_date": DL.get_publishing_date(row),
+ "title": DL.get_title(row),
+ }
+ if row["jrc_obtained_from"] == "Crossref" and "abstract" in row:
+ result["data"]["abstract"] = row["abstract"]
return generate_response(result)
-@app.route('/doi/custom', methods=['OPTIONS', 'POST'])
+@app.route("/doi/custom", methods=["OPTIONS", "POST"])
def show_dois_custom():
- '''
+ """
Return DOIs for a given find query
Return a list of DOI records for a given query.
---
@@ -1760,31 +1941,31 @@ def show_dois_custom():
description: DOI data
500:
description: MongoDB or formatting error
- '''
+ """
result = initialize_result()
ipd = receive_payload()
- if "query" not in ipd or not ipd['query']:
+ if "query" not in ipd or not ipd["query"]:
raise InvalidUsage("You must specify a custom query")
- result['rest']['source'] = 'mongo'
- result['rest']['query'] = ipd['query']
- result['data'] = []
- print(ipd['query'])
+ result["rest"]["source"] = "mongo"
+ result["rest"]["query"] = ipd["query"]
+ result["data"] = []
+ print(ipd["query"])
try:
- rows = DB['dis'].dois.find(ipd['query'], {'_id': 0})
+ rows = DB["dis"].dois.find(ipd["query"], {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not rows:
generate_response(result)
for row in rows:
- result['data'].append(row)
- result['rest']['row_count'] += 1
+ result["data"].append(row)
+ result["rest"]["row_count"] += 1
return generate_response(result)
-@app.route('/components', defaults={'ctype': 'dis'}, methods=['OPTIONS', 'POST'])
-@app.route('/components/', methods=['OPTIONS', 'POST'])
-def show_multiple_components(ctype='dis'):
- '''
+@app.route("/components", defaults={"ctype": "dis"}, methods=["OPTIONS", "POST"])
+@app.route("/components/", methods=["OPTIONS", "POST"])
+def show_multiple_components(ctype="dis"):
+ """
Return DOI components for a given tag
Return a list of citation components for a given tag.
---
@@ -1808,36 +1989,37 @@ def show_multiple_components(ctype='dis'):
description: Component data
500:
description: MongoDB or formatting error
- '''
+ """
result = initialize_result()
ipd = receive_payload()
- if "tag" not in ipd or not (ipd['tag']) or not isinstance(ipd['tag'], str):
+ if "tag" not in ipd or not (ipd["tag"]) or not isinstance(ipd["tag"], str):
raise InvalidUsage("You must specify a tag")
- result['rest']['source'] = 'mongo'
- result['data'] = []
+ result["rest"]["source"] = "mongo"
+ result["data"] = []
try:
- rows = DB['dis'].dois.find({"jrc_tag.name": ipd['tag']}, {'_id': 0})
+ rows = DB["dis"].dois.find({"jrc_tag.name": ipd["tag"]}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not rows:
generate_response(result)
for row in rows:
- record = {"doi": row['doi'],
- "authors": DL.get_author_list(row, style=ctype, returntype="list"),
- "title": DL.get_title(row),
- "journal": DL.get_journal(row),
- "publishing_date": DL.get_publishing_date(row)
- }
- if row['jrc_obtained_from'] == 'Crossref' and 'abstract' in row:
- record['abstract'] = row['abstract']
- result['data'].append(record)
- result['rest']['row_count'] += 1
+ record = {
+ "doi": row["doi"],
+ "authors": DL.get_author_list(row, style=ctype, returntype="list"),
+ "title": DL.get_title(row),
+ "journal": DL.get_journal(row),
+ "publishing_date": DL.get_publishing_date(row),
+ }
+ if row["jrc_obtained_from"] == "Crossref" and "abstract" in row:
+ record["abstract"] = row["abstract"]
+ result["data"].append(record)
+ result["rest"]["row_count"] += 1
return generate_response(result)
-@app.route('/types')
+@app.route("/types")
def show_types():
- '''
+ """
Show data types
Return DOI data types, subtypes, and counts
---
@@ -1848,30 +2030,38 @@ def show_types():
description: types
500:
description: MongoDB error
- '''
+ """
result = initialize_result()
- payload = [{"$group": {"_id": {"type": "$type", "subtype": "$subtype"},"count": {"$sum": 1}}}]
+ payload = [
+ {
+ "$group": {
+ "_id": {"type": "$type", "subtype": "$subtype"},
+ "count": {"$sum": 1},
+ }
+ }
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- result['rest']['source'] = 'mongo'
- result['data'] = {}
+ result["rest"]["source"] = "mongo"
+ result["data"] = {}
for row in rows:
- if 'type' not in row['_id']:
- result['data']['datacite'] = {"count": row['count'], "subtype": None}
+ if "type" not in row["_id"]:
+ result["data"]["datacite"] = {"count": row["count"], "subtype": None}
else:
- typ = row['_id']['type']
- result['data'][typ] = {"count": row['count']}
- result['data'][typ]['subtype'] = row['_id']['subtype'] if 'subtype' in row['_id'] \
- else None
- result['rest']['row_count'] = len(result['data'])
+ typ = row["_id"]["type"]
+ result["data"][typ] = {"count": row["count"]}
+ result["data"][typ]["subtype"] = (
+ row["_id"]["subtype"] if "subtype" in row["_id"] else None
+ )
+ result["rest"]["row_count"] = len(result["data"])
return generate_response(result)
-@app.route('/doi/jrc_author/', methods=['OPTIONS', 'POST'])
+@app.route("/doi/jrc_author/", methods=["OPTIONS", "POST"])
def set_jrc_author(doi):
- '''
+ """
Update Janelia authors for a given DOI
Update Janelia authors (as employee IDs) in "jrc_author" for a given DOI.
---
@@ -1889,45 +2079,47 @@ def set_jrc_author(doi):
description: Success
500:
description: MongoDB or formatting error
- '''
- doi = doi.lstrip('/').rstrip('/').lower()
+ """
+ doi = doi.lstrip("/").rstrip("/").lower()
result = initialize_result()
- result['data'] = []
+ result["data"] = []
try:
- row = DB['dis'].dois.find_one({"doi": doi}, {'_id': 0})
+ row = DB["dis"].dois.find_one({"doi": doi}, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
if not row:
raise InvalidUsage(f"Could not find DOI {doi}", 400)
- result['rest']['row_count'] = 1
+ result["rest"]["row_count"] = 1
try:
- authors = DL.get_author_details(row, DB['dis'].orcid)
+ authors = DL.get_author_details(row, DB["dis"].orcid)
except Exception as err:
raise InvalidUsage(str(err), 500) from err
jrc_author = []
for auth in authors:
- if auth['janelian'] and 'employeeId' in auth and auth['employeeId']:
- jrc_author.append(auth['employeeId'])
+ if auth["janelian"] and "employeeId" in auth and auth["employeeId"]:
+ jrc_author.append(auth["employeeId"])
if not jrc_author:
return generate_response(result)
payload = {"$set": {"jrc_author": jrc_author}}
try:
- res = DB['dis'].dois.update_one({"doi": doi}, payload)
+ res = DB["dis"].dois.update_one({"doi": doi}, payload)
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- if hasattr(res, 'matched_count') and res.matched_count:
- if hasattr(res, 'modified_count') and res.modified_count:
- result['rest']['rows_updated'] = res.modified_count
- result['data'] = jrc_author
+ if hasattr(res, "matched_count") and res.matched_count:
+ if hasattr(res, "modified_count") and res.modified_count:
+ result["rest"]["rows_updated"] = res.modified_count
+ result["data"] = jrc_author
return generate_response(result)
+
# ******************************************************************************
# * API endpoints (ORCID) *
# ******************************************************************************
-@app.route('/orcid')
+
+@app.route("/orcid")
def show_oids():
- '''
+ """
Show saved ORCID IDs
Return information for saved ORCID IDs
---
@@ -1938,23 +2130,28 @@ def show_oids():
description: ORCID data
500:
description: MongoDB error
- '''
+ """
result = initialize_result()
try:
- rows = DB['dis'].orcid.find({}, {'_id': 0}).collation({"locale": "en"}).sort("family", 1)
+ rows = (
+ DB["dis"]
+ .orcid.find({}, {"_id": 0})
+ .collation({"locale": "en"})
+ .sort("family", 1)
+ )
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- result['rest']['source'] = 'mongo'
- result['data'] = []
+ result["rest"]["source"] = "mongo"
+ result["data"] = []
for row in rows:
- result['data'].append(row)
- result['rest']['row_count'] = len(result['data'])
+ result["data"].append(row)
+ result["rest"]["row_count"] = len(result["data"])
return generate_response(result)
-@app.route('/orcid/')
+@app.route("/orcid/")
def show_oid(oid):
- '''
+ """
Show an ORCID ID
Return information for an ORCID ID or name
---
@@ -1972,28 +2169,31 @@ def show_oid(oid):
description: ORCID data
500:
description: MongoDB error
- '''
+ """
result = initialize_result()
- if re.match(r'([0-9A-Z]{4}-){3}[0-9A-Z]+', oid):
+ if re.match(r"([0-9A-Z]{4}-){3}[0-9A-Z]+", oid):
payload = {"orcid": oid}
else:
- payload = {"$or": [{"family": {"$regex": oid, "$options" : "i"}},
- {"given": {"$regex": oid, "$options" : "i"}}]
- }
+ payload = {
+ "$or": [
+ {"family": {"$regex": oid, "$options": "i"}},
+ {"given": {"$regex": oid, "$options": "i"}},
+ ]
+ }
try:
- rows = DB['dis'].orcid.find(payload, {'_id': 0})
+ rows = DB["dis"].orcid.find(payload, {"_id": 0})
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- result['rest']['source'] = 'mongo'
- result['data'] = []
+ result["rest"]["source"] = "mongo"
+ result["data"] = []
for row in rows:
- result['data'].append(row)
+ result["data"].append(row)
return generate_response(result)
-@app.route('/orcidapi/')
+@app.route("/orcidapi/")
def show_oidapi(oid):
- '''
+ """
Show an ORCID ID (using the ORCID API)
Return information for an ORCID ID (using the ORCID API)
---
@@ -2009,87 +2209,105 @@ def show_oidapi(oid):
responses:
200:
description: ORCID data
- '''
+ """
result = initialize_result()
url = f"{app.config['ORCID']}{oid}"
try:
resp = requests.get(url, headers={"Accept": "application/json"}, timeout=10)
- result['data'] = resp.json()
+ result["data"] = resp.json()
except Exception as err:
raise InvalidUsage(str(err), 500) from err
- if 'error-code' not in result['data']:
- result['rest']['source'] = 'orcid'
- result['rest']['row_count'] = 1
+ if "error-code" not in result["data"]:
+ result["rest"]["source"] = "orcid"
+ result["rest"]["row_count"] = 1
return generate_response(result)
# ******************************************************************************
# * UI endpoints (general) *
# ******************************************************************************
-@app.route('/download/')
+@app.route("/download/")
def download(fname):
- ''' Downloadable content
- '''
+ """Downloadable content"""
try:
- return send_file('/tmp/' + fname, download_name=fname) # pylint: disable=E1123
+ return send_file("/tmp/" + fname, download_name=fname) # pylint: disable=E1123
except Exception as err:
- return render_template("error.html", urlroot=request.url_root,
- title='Download error', message=err)
+ return render_template(
+ "error.html", urlroot=request.url_root, title="Download error", message=err
+ )
-@app.route('/')
-@app.route('/home')
+@app.route("/")
+@app.route("/home")
def show_home():
- ''' Home
- '''
- jlist = get_top_journals('All').keys()
- journals = ''
- return make_response(render_template('home.html', urlroot=request.url_root,
- journals=journals,
- navbar=generate_navbar('Home')))
+ """Home"""
+ jlist = get_top_journals("All").keys()
+ journals = ""
+ return make_response(
+ render_template(
+ "home.html",
+ urlroot=request.url_root,
+ journals=journals,
+ navbar=generate_navbar("Home"),
+ )
+ )
+
# ******************************************************************************
# * UI endpoints (DOI) *
# ******************************************************************************
-@app.route('/doiui/')
+@app.route("/doiui/")
def show_doi_ui(doi):
- ''' Show DOI
- '''
+ """Show DOI"""
# pylint: disable=too-many-return-statements
- doi = doi.lstrip('/').rstrip('/').lower()
+ doi = doi.lstrip("/").rstrip("/").lower()
try:
- row = DB['dis'].dois.find_one({"doi": doi})
+ row = DB["dis"].dois.find_one({"doi": doi})
except Exception as err:
- return inspect_error(err, 'Could not get DOI')
+ return inspect_error(err, "Could not get DOI")
if row:
html = '
This DOI is saved locally in the Janelia database
'
html += add_jrc_fields(row)
else:
- html = '
This DOI is not saved locally in the ' \
- + 'Janelia database
'
+ html = (
+ '
This DOI is not saved locally in the '
+ + "Janelia database
"
+ )
_, data = get_doi(doi)
if not data:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning("Could not find DOI", 'warning'),
- message=f"Could not find DOI {doi}")
- authors = DL.get_author_list(data, orcid=True, project_map=DB['dis'].project_map)
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not find DOI", "warning"),
+ message=f"Could not find DOI {doi}",
+ )
+ authors = DL.get_author_list(data, orcid=True, project_map=DB["dis"].project_map)
if not authors:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not generate author list"),
- message=f"Could not generate author list for {doi}")
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not generate author list"),
+ message=f"Could not generate author list for {doi}",
+ )
title = DL.get_title(data)
if not title:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not find title"),
- message=f"Could not find title for {doi}")
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not find title"),
+ message=f"Could not find title for {doi}",
+ )
citation = f"{authors} {title}."
journal = DL.get_journal(data)
if not journal:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not find journal"),
- message=f"Could not find journal for {doi}")
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not find journal"),
+ message=f"Could not find journal for {doi}",
+ )
link = f"{doi}"
rlink = f"/doi/{doi}"
mlink = f"/doi/migration/{doi}"
@@ -2098,331 +2316,454 @@ def show_doi_ui(doi):
if oresp:
olink = f"{app.config['OA']}{doi}"
obutton = f" {tiny_badge('primary', 'OA data', olink)}"
- chead = 'Citation'
- if 'type' in data:
+ chead = "Citation"
+ if "type" in data:
chead += f" for {data['type'].replace('-', ' ')}"
- if 'subtype' in data:
+ if "subtype" in data:
chead += f" {data['subtype'].replace('-', ' ')}"
- elif 'types' in data and 'resourceTypeGeneral' in data['types']:
+ elif "types" in data and "resourceTypeGeneral" in data["types"]:
chead += f" for {data['types']['resourceTypeGeneral']}"
- html += f"
"
+ )
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=doi,
+ html=html,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/doisui_name/")
def show_doi_by_name_ui(name):
- ''' Show DOIs for a family name
- '''
- payload = {'$or': [{"author.family": {"$regex": f"^{name}$", "$options" : "i"}},
- {"creators.familyName": {"$regex": f"^{name}$", "$options" : "i"}},
- {"creators.name": {"$regex": f"{name}$", "$options" : "i"}},
- ]}
- try:
- rows = DB['dis'].dois.find(payload).collation({"locale": "en"}).sort("doi", 1)
- except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get DOIs from dois collection"),
- message=error_message(err))
+ """Show DOIs for a family name"""
+ payload = {
+ "$or": [
+ {"author.family": {"$regex": f"^{name}$", "$options": "i"}},
+ {"creators.familyName": {"$regex": f"^{name}$", "$options": "i"}},
+ {"creators.name": {"$regex": f"{name}$", "$options": "i"}},
+ ]
+ }
+ try:
+ rows = DB["dis"].dois.find(payload).collation({"locale": "en"}).sort("doi", 1)
+ except Exception as err:
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get DOIs from dois collection"),
+ message=error_message(err),
+ )
html, _ = generate_works_table(rows, name)
if not html:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning("Could not find DOIs", 'warning'),
- message=f"Could not find any DOIs with author name matching {name}")
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"DOIs for {name}", html=html,
- navbar=generate_navbar('DOIs')))
-
-
-@app.route('/doisui_type///', defaults={'year': 'All'})
-@app.route('/doisui_type////')
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not find DOIs", "warning"),
+ message=f"Could not find any DOIs with author name matching {name}",
+ )
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"DOIs for {name}",
+ html=html,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route(
+ "/doisui_type///", defaults={"year": "All"}
+)
+@app.route("/doisui_type////")
def show_doi_by_type_ui(src, typ, sub, year):
- ''' Show DOIs for a given type/subtype
- '''
- payload = {"jrc_obtained_from": src,
- ("type" if src == 'Crossref' else 'types.resourceTypeGeneral'): typ}
- if sub != 'None':
+ """Show DOIs for a given type/subtype"""
+ payload = {
+ "jrc_obtained_from": src,
+ ("type" if src == "Crossref" else "types.resourceTypeGeneral"): typ,
+ }
+ if sub != "None":
payload["subtype"] = sub
- if year != 'All':
- payload['jrc_publishing_date'] = {"$regex": "^" + year}
+ if year != "All":
+ payload["jrc_publishing_date"] = {"$regex": "^" + year}
try:
- rows = DB['dis'].dois.find(payload).collation({"locale": "en"}).sort("doi", 1)
+ rows = DB["dis"].dois.find(payload).collation({"locale": "en"}).sort("doi", 1)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get DOIs from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get DOIs from dois collection"),
+ message=error_message(err),
+ )
html, _ = generate_works_table(rows)
desc = f"{src} {typ}"
- if sub != 'None':
+ if sub != "None":
desc += f"/{sub}"
- if year != 'All':
+ if year != "All":
desc += f" ({year})"
if not html:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning("Could not find DOIs", 'warning'),
- message="Could not find any DOIs with type/subtype matching " \
- + desc)
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"DOIs for {desc}", html=html,
- navbar=generate_navbar('DOIs')))
-
-
-@app.route('/titlesui/')
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not find DOIs", "warning"),
+ message="Could not find any DOIs with type/subtype matching " + desc,
+ )
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"DOIs for {desc}",
+ html=html,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/titlesui/")
def show_doi_by_title_ui(title):
- ''' Show DOIs for a given title
- '''
- payload = ([{"$unwind" : "$title"},
- {"$match": {"title": {"$regex": title, "$options" : "i"},
- }}
- ])
- try:
- rows = DB['dis'].dois.aggregate(payload)
- except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get DOIs from dois collection"),
- message=error_message(err))
+ """Show DOIs for a given title"""
+ payload = [
+ {"$unwind": "$title"},
+ {
+ "$match": {
+ "title": {"$regex": title, "$options": "i"},
+ }
+ },
+ ]
+ try:
+ rows = DB["dis"].dois.aggregate(payload)
+ except Exception as err:
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get DOIs from dois collection"),
+ message=error_message(err),
+ )
union = []
for row in rows:
union.append(row)
- payload = {"titles.title": {"$regex": title, "$options" : "i"}}
+ payload = {"titles.title": {"$regex": title, "$options": "i"}}
try:
- rows = DB['dis'].dois.find(payload)
+ rows = DB["dis"].dois.find(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get DOIs from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get DOIs from dois collection"),
+ message=error_message(err),
+ )
for row in rows:
union.append(row)
html, _ = generate_works_table(union, title)
if not html:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning("Could not find DOIs", 'warning'),
- message=f"Could not find any DOIs with title matching {title}")
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"DOIs for {title}", html=html,
- navbar=generate_navbar('DOIs')))
-
-
-@app.route('/dois_author/')
-@app.route('/dois_author')
-def dois_author(year='All'):
- ''' Show first/last authors
- '''
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not find DOIs", "warning"),
+ message=f"Could not find any DOIs with title matching {title}",
+ )
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"DOIs for {title}",
+ html=html,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/dois_author/")
+@app.route("/dois_author")
+def dois_author(year="All"):
+ """Show first/last authors"""
source = {}
- for src in ('Crossref', 'DataCite', 'Crossref-all', 'DataCite-all', 'Crossref-jrc',
- 'DataCite-jrc'):
- payload = {"jrc_obtained_from": src,
- "$or": [{"jrc_first_author": {"$exists": True}},
- {"jrc_last_author": {"$exists": True}}]}
- if '-all' in src:
- payload = {"jrc_obtained_from": src.replace('-all', '')}
- elif '-jrc' in src:
- payload = {"jrc_obtained_from": src.replace('-jrc', ''),
- "$or": [{"jrc_first_author": {"$exists": True}},
- {"jrc_last_author": {"$exists": True}},
- {"jrc_author": {"$exists": True}}]}
- if year != 'All':
- payload['jrc_publishing_date'] = {"$regex": "^"+ year}
+ for src in (
+ "Crossref",
+ "DataCite",
+ "Crossref-all",
+ "DataCite-all",
+ "Crossref-jrc",
+ "DataCite-jrc",
+ ):
+ payload = {
+ "jrc_obtained_from": src,
+ "$or": [
+ {"jrc_first_author": {"$exists": True}},
+ {"jrc_last_author": {"$exists": True}},
+ ],
+ }
+ if "-all" in src:
+ payload = {"jrc_obtained_from": src.replace("-all", "")}
+ elif "-jrc" in src:
+ payload = {
+ "jrc_obtained_from": src.replace("-jrc", ""),
+ "$or": [
+ {"jrc_first_author": {"$exists": True}},
+ {"jrc_last_author": {"$exists": True}},
+ {"jrc_author": {"$exists": True}},
+ ],
+ }
+ if year != "All":
+ payload["jrc_publishing_date"] = {"$regex": "^" + year}
try:
- cnt = DB['dis'].dois.count_documents(payload)
+ cnt = DB["dis"].dois.count_documents(payload)
source[src] = cnt
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get authorship " \
- + "from dois collection"),
- message=error_message(err))
- html = '
' \
- + '
Authorship
Crossref
DataCite
' \
- + '
'
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get authorship " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
+ html = (
+ '
'
+ + "
Authorship
Crossref
DataCite
"
+ + "
"
+ )
data = {}
- for src in app.config['SOURCES']:
+ for src in app.config["SOURCES"]:
data[src] = source[src]
- html += f"
All authors
{source['Crossref-all']:,}
" \
- + f"
{source['DataCite-all']:,}
"
- html += f"
Any Janelia author
{source['Crossref-jrc']:,}
" \
- + f"
{source['DataCite-jrc']:,}
"
- html += f"
First and/or last
{source['Crossref']:,}
" \
- + f"
{source['DataCite']:,}
"
- html += f"
Additional only
{source['Crossref-jrc']-source['Crossref']:,}
" \
- + f"
{source['DataCite-jrc']-source['DataCite']:,}
"
- html += '
' + year_pulldown('dois_author')
- data = {"Crossref": source['Crossref-jrc'],
- "DataCite": source['DataCite-jrc']}
+ html += (
+ f"
All authors
{source['Crossref-all']:,}
"
+ + f"
{source['DataCite-all']:,}
"
+ )
+ html += (
+ f"
Any Janelia author
{source['Crossref-jrc']:,}
"
+ + f"
{source['DataCite-jrc']:,}
"
+ )
+ html += (
+ f"
First and/or last
{source['Crossref']:,}
"
+ + f"
{source['DataCite']:,}
"
+ )
+ html += (
+ f"
Additional only
{source['Crossref-jrc']-source['Crossref']:,}
"
+ + f"
{source['DataCite-jrc']-source['DataCite']:,}
"
+ )
+ html += "
" + year_pulldown("dois_author")
+ data = {"Crossref": source["Crossref-jrc"], "DataCite": source["DataCite-jrc"]}
title = "DOIs by authorship, any Janelia author"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- chartscript, chartdiv = DP.pie_chart(data, title, "source",
- colors=DP.SOURCE_PALETTE)
- data = {"First and/or last": source['Crossref'],
- "Additional": source['Crossref-jrc']-source['Crossref']}
+ chartscript, chartdiv = DP.pie_chart(
+ data, title, "source", colors=DP.SOURCE_PALETTE
+ )
+ data = {
+ "First and/or last": source["Crossref"],
+ "Additional": source["Crossref-jrc"] - source["Crossref"],
+ }
title = "Crossref DOIs by authorship"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- script2, div2 = DP.pie_chart(data, title, "source",
- colors=DP.SOURCE_PALETTE)
+ script2, div2 = DP.pie_chart(data, title, "source", colors=DP.SOURCE_PALETTE)
chartscript += script2
chartdiv += div2
- if source['DataCite'] or source['DataCite-jrc']:
- data = {"First and/or last": source['DataCite'],
- "Additional": source['DataCite-jrc']-source['DataCite']}
+ if source["DataCite"] or source["DataCite-jrc"]:
+ data = {
+ "First and/or last": source["DataCite"],
+ "Additional": source["DataCite-jrc"] - source["DataCite"],
+ }
title = "DataCite DOIs by authorship"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- script2, div2 = DP.pie_chart(data, title, "source",
- colors=DP.SOURCE_PALETTE)
+ script2, div2 = DP.pie_chart(data, title, "source", colors=DP.SOURCE_PALETTE)
chartscript += script2
chartdiv += div2
title = "DOI authorship"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- return make_response(render_template('bokeh.html', urlroot=request.url_root,
- title=title, html=html,
- chartscript=chartscript, chartdiv=chartdiv,
- navbar=generate_navbar('Authorship')))
-
-
-@app.route('/doiui_group/')
-@app.route('/doiui_group')
-def doiui_group(year='All'):
- ''' Show group leader first/last authorship
- '''
+ return make_response(
+ render_template(
+ "bokeh.html",
+ urlroot=request.url_root,
+ title=title,
+ html=html,
+ chartscript=chartscript,
+ chartdiv=chartdiv,
+ navbar=generate_navbar("Authorship"),
+ )
+ )
+
+
+@app.route("/doiui_group/")
+@app.route("/doiui_group")
+def doiui_group(year="All"):
+ """Show group leader first/last authorship"""
payload = {"group_code": {"$exists": True}}
try:
- rows = DB['dis'].orcid.find(payload, {"employeeId": 1})
+ rows = DB["dis"].orcid.find(payload, {"employeeId": 1})
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get group leads " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get group leads " + "from dois collection"),
+ message=error_message(err),
+ )
leads = []
for row in rows:
- leads.append(row['employeeId'])
+ leads.append(row["employeeId"])
payload = {"jrc_first_id": {"$in": leads}}
- if year != 'All':
- payload['jrc_publishing_date'] = {"$regex": "^"+ year}
+ if year != "All":
+ payload["jrc_publishing_date"] = {"$regex": "^" + year}
cnt = {}
try:
- cnt['first'] = DB['dis'].dois.count_documents(payload)
+ cnt["first"] = DB["dis"].dois.count_documents(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get first authors " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get first authors " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
payload = {"jrc_last_id": {"$in": leads}}
- if year != 'All':
- payload['jrc_publishing_date'] = {"$regex": "^"+ year}
+ if year != "All":
+ payload["jrc_publishing_date"] = {"$regex": "^" + year}
try:
- cnt['last'] = DB['dis'].dois.count_documents(payload)
+ cnt["last"] = DB["dis"].dois.count_documents(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get last authors " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get last authors " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
payload = {"jrc_author": {"$exists": True}}
- if year != 'All':
- payload['jrc_publishing_date'] = {"$regex": "^"+ year}
+ if year != "All":
+ payload["jrc_publishing_date"] = {"$regex": "^" + year}
try:
- cnt['total'] = DB['dis'].dois.count_documents(payload)
+ cnt["total"] = DB["dis"].dois.count_documents(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get last authors " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get last authors " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
html = "
"
html += f"
Lab head first author
{cnt['first']:,}
"
html += f"
Lab head last author
{cnt['last']:,}
"
- html += "
" + year_pulldown('doiui_group')
- data = {'Lab head first author': cnt['first'],
- 'Non-lab head first author': cnt['total'] - cnt['first']}
+ html += "
" + year_pulldown("doiui_group")
+ data = {
+ "Lab head first author": cnt["first"],
+ "Non-lab head first author": cnt["total"] - cnt["first"],
+ }
title = "DOIs with lab head first author"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- chartscript, chartdiv = DP.pie_chart(data, title, "source",
- width=520, height=350,
- colors=DP.SOURCE_PALETTE)
- data = {'Lab head last author': cnt['last'],
- 'Non-lab head last author': cnt['total'] - cnt['last']}
+ chartscript, chartdiv = DP.pie_chart(
+ data, title, "source", width=520, height=350, colors=DP.SOURCE_PALETTE
+ )
+ data = {
+ "Lab head last author": cnt["last"],
+ "Non-lab head last author": cnt["total"] - cnt["last"],
+ }
title = "DOIs with lab head last author"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- script2, div2 = DP.pie_chart(data, title, "source",
- width=520, height=350,
- colors=DP.SOURCE_PALETTE)
+ script2, div2 = DP.pie_chart(
+ data, title, "source", width=520, height=350, colors=DP.SOURCE_PALETTE
+ )
chartscript += script2
chartdiv += div2
title = "DOIs with lab head first/last authors"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- return make_response(render_template('bokeh.html', urlroot=request.url_root,
- title=title, html=html,
- chartscript=chartscript, chartdiv=chartdiv,
- navbar=generate_navbar('Authorship')))
+ return make_response(
+ render_template(
+ "bokeh.html",
+ urlroot=request.url_root,
+ title=title,
+ html=html,
+ chartscript=chartscript,
+ chartdiv=chartdiv,
+ navbar=generate_navbar("Authorship"),
+ )
+ )
def get_top_journals(year):
- ''' Get top journals
- '''
- match = {"container-title": {"$exists": True, "$ne" : ""}}
- if year != 'All':
- match["jrc_publishing_date"] = {"$regex": "^"+ year}
- payload = [{"$unwind" : "$container-title"},
- {"$match": match},
- {"$group": {"_id": "$container-title", "count":{"$sum": 1}}},
- ]
- try:
- rows = DB['dis'].dois.aggregate(payload)
+ """Get top journals"""
+ match = {"container-title": {"$exists": True, "$ne": ""}}
+ if year != "All":
+ match["jrc_publishing_date"] = {"$regex": "^" + year}
+ payload = [
+ {"$unwind": "$container-title"},
+ {"$match": match},
+ {"$group": {"_id": "$container-title", "count": {"$sum": 1}}},
+ ]
+ try:
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
raise err
journal = {}
for row in rows:
- journal[row['_id']] = row['count']
- payload = [{"$unwind" : "$institution"},
- {"$match": match},
- {"$group": {"_id": "$institution.name", "count":{"$sum": 1}}},
- ]
+ journal[row["_id"]] = row["count"]
+ payload = [
+ {"$unwind": "$institution"},
+ {"$match": match},
+ {"$group": {"_id": "$institution.name", "count": {"$sum": 1}}},
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
raise err
for row in rows:
- journal[row['_id']] = row['count']
+ journal[row["_id"]] = row["count"]
return journal
-@app.route('/dois_journal//')
-@app.route('/dois_journal/')
-@app.route('/dois_journal')
-def dois_journal(year='All', top=10):
- ''' Show journals
- '''
+@app.route("/dois_journal//")
+@app.route("/dois_journal/")
+@app.route("/dois_journal")
+def dois_journal(year="All", top=10):
+ """Show journals"""
top = min(top, 20)
try:
journal = get_top_journals(year)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get journal data from dois"),
- message=error_message(err))
- html = '
' \
- + '
Journal
Count
'
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get journal data from dois"),
+ message=error_message(err),
+ )
+ html = (
+ '
'
+ + "
Journal
Count
"
+ )
data = {}
for key in sorted(journal, key=journal.get, reverse=True):
val = journal[key]
@@ -2430,425 +2771,609 @@ def dois_journal(year='All', top=10):
continue
data[key] = val
html += f"
" + year_pulldown("dois_journal")
title = "DOIs by journal"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- chartscript, chartdiv = DP.pie_chart(data, title, "source", width=875, height=550,
- colors='Category20')
+ chartscript, chartdiv = DP.pie_chart(
+ data, title, "source", width=875, height=550, colors="Category20"
+ )
title = f"Top {top} DOI journals"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- return make_response(render_template('bokeh.html', urlroot=request.url_root,
- title=title, html=html,
- chartscript=chartscript, chartdiv=chartdiv,
- navbar=generate_navbar('DOIs')))
-
-
-@app.route('/dois_source/')
-@app.route('/dois_source')
-def dois_source(year='All'):
- ''' Show data sources
- '''
+ return make_response(
+ render_template(
+ "bokeh.html",
+ urlroot=request.url_root,
+ title=title,
+ html=html,
+ chartscript=chartscript,
+ chartdiv=chartdiv,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/dois_source/")
+@app.route("/dois_source")
+def dois_source(year="All"):
+ """Show data sources"""
try:
data, hdict = get_source_data(year)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get source data from dois"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get source data from dois"),
+ message=error_message(err),
+ )
# HTML and charts
- html = '
' \
- + '
Source
Type
Subtype
Count
' \
- + '
'
+ html = (
+ '
'
+ + "
Source
Type
Subtype
Count
"
+ + "
"
+ )
for key, val in sorted(hdict.items(), key=itemgetter(1), reverse=True):
- src, typ, sub = key.split('_')
+ src, typ, sub = key.split("_")
if not sub:
- sub = 'None'
- if year == 'All':
+ sub = "None"
+ if year == "All":
val = f"{val}"
else:
val = f"{val}"
- html += f"
{src}
{typ}
{sub if sub != 'None' else ''}
" \
- + f"
{val}
"
- html += '
' + year_pulldown('dois_source')
+ html += (
+ f"
{src}
{typ}
{sub if sub != 'None' else ''}
"
+ + f"
{val}
"
+ )
+ html += "
" + year_pulldown("dois_source")
title = "DOIs by source"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- chartscript, chartdiv = DP.pie_chart(data, title, "source", width=500,
- colors=DP.SOURCE_PALETTE)
- if year == 'All' or year >= '2024':
- payload = [{"$match": {"jrc_inserted" : { "$gte" : OPSTART}}},
- {"$group": {"_id": "$jrc_load_source", "count": {"$sum": 1}}},
- {"$sort" : {"count": -1}}
- ]
+ chartscript, chartdiv = DP.pie_chart(
+ data, title, "source", width=500, colors=DP.SOURCE_PALETTE
+ )
+ if year == "All" or year >= "2024":
+ payload = [
+ {"$match": {"jrc_inserted": {"$gte": OPSTART}}},
+ {"$group": {"_id": "$jrc_load_source", "count": {"$sum": 1}}},
+ {"$sort": {"count": -1}},
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get load methods " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get load methods " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
data = {}
for row in rows:
- data[row['_id']] = row['count']
+ data[row["_id"]] = row["count"]
title = "DOIs by load method"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- script2, div2 = DP.pie_chart(data, title, "source", width=500,
- colors=DP.SOURCE_PALETTE)
+ script2, div2 = DP.pie_chart(
+ data, title, "source", width=500, colors=DP.SOURCE_PALETTE
+ )
chartscript += script2
chartdiv += div2
title = "DOI sources"
- if year != 'All':
+ if year != "All":
title += f" ({year})"
- return make_response(render_template('bokeh.html', urlroot=request.url_root,
- title=title, html=html,
- chartscript=chartscript, chartdiv=chartdiv,
- navbar=generate_navbar('DOIs')))
-
-
-@app.route('/dois_preprint/')
-@app.route('/dois_preprint')
-def dois_preprint(year='All'):
- ''' Show preprints
- '''
+ return make_response(
+ render_template(
+ "bokeh.html",
+ urlroot=request.url_root,
+ title=title,
+ html=html,
+ chartscript=chartscript,
+ chartdiv=chartdiv,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/dois_preprint/")
+@app.route("/dois_preprint")
+def dois_preprint(year="All"):
+ """Show preprints"""
source = {}
- for src in app.config['SOURCES']:
+ for src in app.config["SOURCES"]:
payload = {"jrc_obtained_from": src, "jrc_preprint": {"$exists": False}}
- if year != 'All':
- payload['jrc_publishing_date'] = {"$regex": "^"+ year}
- if src == 'Crossref':
- payload['type'] = {"$in": ["journal-article", "posted-content"]}
+ if year != "All":
+ payload["jrc_publishing_date"] = {"$regex": "^" + year}
+ if src == "Crossref":
+ payload["type"] = {"$in": ["journal-article", "posted-content"]}
else:
- payload['type'] = {"types.resourceTypeGeneral": "Preprint"}
+ payload["type"] = {"types.resourceTypeGeneral": "Preprint"}
try:
- cnt = DB['dis'].dois.count_documents(payload)
+ cnt = DB["dis"].dois.count_documents(payload)
source[src] = cnt
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get source counts " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get source counts " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
match = {"jrc_preprint": {"$exists": True}}
- if year != 'All':
- match['jrc_publishing_date'] = {"$regex": "^"+ year}
- payload = [{"$match": match},
- {"$group": {"_id": {"type": "$type", "preprint": "$preprint"},"count": {"$sum": 1}}}]
+ if year != "All":
+ match["jrc_publishing_date"] = {"$regex": "^" + year}
+ payload = [
+ {"$match": match},
+ {
+ "$group": {
+ "_id": {"type": "$type", "preprint": "$preprint"},
+ "count": {"$sum": 1},
+ }
+ },
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get preprint counts " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get preprint counts " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
data, preprint = compute_preprint_data(rows)
no_relation = get_no_relation()
- html = '
"
+ )
pubs = {}
for row in rows:
- if row['_id']['publisher'] not in pubs:
- pubs[row['_id']['publisher']] = {}
- if row['_id']['source'] not in pubs[row['_id']['publisher']]:
- pubs[row['_id']['publisher']][row['_id']['source']] = row['count']
+ if row["_id"]["publisher"] not in pubs:
+ pubs[row["_id"]["publisher"]] = {}
+ if row["_id"]["source"] not in pubs[row["_id"]["publisher"]]:
+ pubs[row["_id"]["publisher"]][row["_id"]["source"]] = row["count"]
for pub, val in pubs.items():
- onclick = "onclick='nav_post(\"publisher\",\"" + pub + "\")'"
+ onclick = 'onclick=\'nav_post("publisher","' + pub + "\")'"
link = f"{pub}"
html += f"
{link}
"
- for source in app.config['SOURCES']:
+ for source in app.config["SOURCES"]:
if source in val:
- onclick = "onclick='nav_post(\"publisher\",\"" + pub \
- + "\",\"" + source + "\")'"
+ onclick = (
+ 'onclick=\'nav_post("publisher","' + pub + '","' + source + "\")'"
+ )
link = f"{val[source]:,}"
else:
link = ""
html += f"
"
+ )
tags = {}
for row in rows:
- if row['_id']['tag'] not in tags:
- tags[row['_id']['tag']] = {}
- if row['_id']['source'] not in tags[row['_id']['tag']]:
- tags[row['_id']['tag']][row['_id']['source']] = row['count']
+ if row["_id"]["tag"] not in tags:
+ tags[row["_id"]["tag"]] = {}
+ if row["_id"]["source"] not in tags[row["_id"]["tag"]]:
+ tags[row["_id"]["tag"]][row["_id"]["source"]] = row["count"]
for tag, val in tags.items():
link = f"{tag}"
- rclass = 'other'
+ rclass = "other"
if tag in orgs:
- if 'active' in orgs[tag]:
+ if "active" in orgs[tag]:
org = "Yes"
- rclass = 'active'
+ rclass = "active"
else:
org = "Inactive"
else:
org = "No"
html += f"
{link}
{org}
"
- for source in app.config['SOURCES']:
+ for source in app.config["SOURCES"]:
if source in val:
- onclick = "onclick='nav_post(\"jrc_tag.name\",\"" + tag \
- + "\",\"" + source + "\")'"
+ onclick = (
+ 'onclick=\'nav_post("jrc_tag.name","'
+ + tag
+ + '","'
+ + source
+ + "\")'"
+ )
link = f"{val[source]:,}"
else:
link = ""
html += f"
{link}
"
html += "
"
- html += '
'
- cbutton = ""
+ html += "
"
+ cbutton = (
+ '"
+ )
html = cbutton + html
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"DOI tags ({len(tags):,})", html=html,
- navbar=generate_navbar('Tag/affiliation')))
-
-
-@app.route('/dois_top', defaults={'num': 10})
-@app.route('/dois_top/')
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"DOI tags ({len(tags):,})",
+ html=html,
+ navbar=generate_navbar("Tag/affiliation"),
+ )
+ )
+
+
+@app.route("/dois_top", defaults={"num": 10})
+@app.route("/dois_top/")
def dois_top(num):
- ''' Show a chart of DOIs by top tags
- '''
- payload = [{"$unwind" : "$jrc_tag"},
- {"$project": {"_id": 0, "jrc_tag.name": 1, "jrc_publishing_date": 1}},
- {"$group": {"_id": {"tag": "$jrc_tag.name",
- "year": {"$substrBytes": ["$jrc_publishing_date", 0, 4]}},
- "count": {"$sum": 1}},
+ """Show a chart of DOIs by top tags"""
+ payload = [
+ {"$unwind": "$jrc_tag"},
+ {"$project": {"_id": 0, "jrc_tag.name": 1, "jrc_publishing_date": 1}},
+ {
+ "$group": {
+ "_id": {
+ "tag": "$jrc_tag.name",
+ "year": {"$substrBytes": ["$jrc_publishing_date", 0, 4]},
},
- {"$sort": {"_id.year": 1, "_id.tag": 1}}
- ]
+ "count": {"$sum": 1},
+ },
+ },
+ {"$sort": {"_id.year": 1, "_id.tag": 1}},
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get tags from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get tags from dois collection"),
+ message=error_message(err),
+ )
html = ""
ytags = {}
tags = {}
data = {"years": []}
for row in rows:
- if row['_id']['tag'] not in tags:
- tags[row['_id']['tag']] = 0
- tags[row['_id']['tag']] += row['count']
- if row['_id']['year'] not in ytags:
- ytags[row['_id']['year']] = {}
- data['years'].append(row['_id']['year'])
- if row['_id']['tag'] not in ytags[row['_id']['year']]:
- ytags[row['_id']['year']][row['_id']['tag']] = row['count']
+ if row["_id"]["tag"] not in tags:
+ tags[row["_id"]["tag"]] = 0
+ tags[row["_id"]["tag"]] += row["count"]
+ if row["_id"]["year"] not in ytags:
+ ytags[row["_id"]["year"]] = {}
+ data["years"].append(row["_id"]["year"])
+ if row["_id"]["tag"] not in ytags[row["_id"]["year"]]:
+ ytags[row["_id"]["year"]][row["_id"]["tag"]] = row["count"]
top = sorted(tags, key=tags.get, reverse=True)[:num]
- for year in data['years']:
+ for year in data["years"]:
for tag in sorted(tags):
if tag not in top:
continue
@@ -2863,65 +3388,107 @@ def dois_top(num):
height += 22 * (num - 23)
colors = plasma(len(top))
if len(top) <= 10:
- colors = all_palettes['Category10'][len(top)]
+ colors = all_palettes["Category10"][len(top)]
elif len(top) <= 20:
- colors = all_palettes['Category20'][len(top)]
- chartscript, chartdiv = DP.stacked_bar_chart(data, f"DOIs published by year for top {num} tags",
- xaxis="years", yaxis=top, width=900, height=height,
- colors=colors)
- return make_response(render_template('bokeh.html', urlroot=request.url_root,
- title="DOI tags by year/tag", html=html,
- chartscript=chartscript, chartdiv=chartdiv,
- navbar=generate_navbar('Tag/affiliation')))
-
-
-@app.route('/dois_report/')
-@app.route('/dois_report')
+ colors = all_palettes["Category20"][len(top)]
+ chartscript, chartdiv = DP.stacked_bar_chart(
+ data,
+ f"DOIs published by year for top {num} tags",
+ xaxis="years",
+ yaxis=top,
+ width=900,
+ height=height,
+ colors=colors,
+ )
+ return make_response(
+ render_template(
+ "bokeh.html",
+ urlroot=request.url_root,
+ title="DOI tags by year/tag",
+ html=html,
+ chartscript=chartscript,
+ chartdiv=chartdiv,
+ navbar=generate_navbar("Tag/affiliation"),
+ )
+ )
+
+
+@app.route("/dois_report/")
+@app.route("/dois_report")
def dois_report(year=str(datetime.now().year)):
- ''' Show year in review
- '''
- pmap = {"journal-article": "Journal articles", "posted-content": "Posted content",
- "preprints": "Preprints", "proceedings-article": "Proceedings articles",
- "book-chapter": "Book chapters", "datasets": "Datasets",
- "peer-review": "Peer reviews", "grant": "Grants", "other": "Other"}
- payload = [{"$match": {"jrc_publishing_date": {"$regex": "^"+ year}}},
- {"$group": {"_id": {"type": "$type", "subtype": "$subtype",
- "DataCite": "$types.resourceTypeGeneral"}, "count": {"$sum": 1}}}
- ]
- coll = DB['dis'].dois
+ """Show year in review"""
+ pmap = {
+ "journal-article": "Journal articles",
+ "posted-content": "Posted content",
+ "preprints": "Preprints",
+ "proceedings-article": "Proceedings articles",
+ "book-chapter": "Book chapters",
+ "datasets": "Datasets",
+ "peer-review": "Peer reviews",
+ "grant": "Grants",
+ "other": "Other",
+ }
+ payload = [
+ {"$match": {"jrc_publishing_date": {"$regex": "^" + year}}},
+ {
+ "$group": {
+ "_id": {
+ "type": "$type",
+ "subtype": "$subtype",
+ "DataCite": "$types.resourceTypeGeneral",
+ },
+ "count": {"$sum": 1},
+ }
+ },
+ ]
+ coll = DB["dis"].dois
try:
rows = coll.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get yearly metrics " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get yearly metrics " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
typed = counts_by_type(rows)
first, last, anyauth = get_first_last_authors(year)
stat = {}
# Journal count
- payload = [{"$unwind" : "$container-title"},
- {"$match": {"container-title": {"$exists": True}, "type": "journal-article",
- "jrc_publishing_date": {"$regex": "^"+ year}}},
- {"$group": {"_id": "$container-title", "count":{"$sum": 1}}}
- ]
+ payload = [
+ {"$unwind": "$container-title"},
+ {
+ "$match": {
+ "container-title": {"$exists": True},
+ "type": "journal-article",
+ "jrc_publishing_date": {"$regex": "^" + year},
+ }
+ },
+ {"$group": {"_id": "$container-title", "count": {"$sum": 1}}},
+ ]
try:
rows = coll.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get journal metrics " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get journal metrics " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
cnt = 0
for row in rows:
- if row['_id']:
+ if row["_id"]:
cnt += 1
- typed['Crossref'] = 0
+ typed["Crossref"] = 0
sheet = []
for key, val in pmap.items():
if key in typed:
- if key not in ('DataCite', 'preprints'):
- typed['Crossref'] += typed[key]
+ if key not in ("DataCite", "preprints"):
+ typed["Crossref"] += typed[key]
additional = []
if key in first:
additional.append(f"{first[key]:,} with Janelian first author")
@@ -2930,11 +3497,15 @@ def dois_report(year=str(datetime.now().year)):
if key in anyauth:
additional.append(f"{anyauth[key]:,} with any Janelian author")
additional = f" ({', '.join(additional)})" if additional else ""
- stat[val] = f"{typed[key]:,} {val.lower()}"
- if val in ('Journal articles', 'Preprints'):
+ stat[val] = (
+ f"{typed[key]:,} {val.lower()}"
+ )
+ if val in ("Journal articles", "Preprints"):
sheet.append(f"{val}\t{typed[key]}")
- if val == 'Journal articles':
- stat[val] += f" in {cnt:,} journals"
+ if val == "Journal articles":
+ stat[val] += (
+ f" in {cnt:,} journals"
+ )
sheet.append(f"\tJournals\t{cnt}")
if key in first:
sheet.append(f"\tFirst authors\t{first[key]}")
@@ -2945,295 +3516,444 @@ def dois_report(year=str(datetime.now().year)):
stat[val] += additional
stat[val] += " "
# figshare (unversioned only)
- payload = [{"$match": {"doi": {"$regex": "janelia.[0-9]+$"},
- "jrc_publishing_date": {"$regex": "^"+ year}}},
- {"$unwind": "$jrc_author"},
- {"$group": {"_id": "$jrc_author", "count": {"$sum": 1}}}]
- try:
- cnt = coll.count_documents(payload[0]['$match'])
- stat['figshare'] = f"{cnt:,} " \
- + "figshare (unversioned) articles"
+ payload = [
+ {
+ "$match": {
+ "doi": {"$regex": "janelia.[0-9]+$"},
+ "jrc_publishing_date": {"$regex": "^" + year},
+ }
+ },
+ {"$unwind": "$jrc_author"},
+ {"$group": {"_id": "$jrc_author", "count": {"$sum": 1}}},
+ ]
+ try:
+ cnt = coll.count_documents(payload[0]["$match"])
+ stat["figshare"] = (
+ f"{cnt:,} "
+ + "figshare (unversioned) articles"
+ )
sheet.append(f"figshare (unversioned) articles\t{cnt}")
rows = coll.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get journal figshare stats"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get journal figshare stats"),
+ message=error_message(err),
+ )
if cnt:
cnt = 0
for row in rows:
cnt += 1
- stat['figshare'] += f" with {cnt:,} " \
- + "Janelia authors "
+ stat["figshare"] += (
+ f" with {cnt:,} "
+ + "Janelia authors "
+ )
sheet.append(f"\tJanelia authors\t{cnt}")
# ORCID stats
orcs = {}
try:
- ocoll = DB['dis'].orcid
+ ocoll = DB["dis"].orcid
rows = ocoll.find({})
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get orcid collection entries"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get orcid collection entries"),
+ message=error_message(err),
+ )
for row in rows:
- if 'employeeId' in row and 'orcid' in row:
- orcs[row['employeeId']] = True
- payload = [{"$match": {"jrc_publishing_date": {"$regex": "^"+ year}}},
- {"$unwind": "$jrc_author"},
- {"$group": {"_id": "$jrc_author", "count": {"$sum": 1}}}
- ]
+ if "employeeId" in row and "orcid" in row:
+ orcs[row["employeeId"]] = True
+ payload = [
+ {"$match": {"jrc_publishing_date": {"$regex": "^" + year}}},
+ {"$unwind": "$jrc_author"},
+ {"$group": {"_id": "$jrc_author", "count": {"$sum": 1}}},
+ ]
try:
rows = coll.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get jrc_authors"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get jrc_authors"),
+ message=error_message(err),
+ )
cnt = orc = 0
for row in rows:
cnt += 1
- if row['_id'] in orcs:
+ if row["_id"] in orcs:
orc += 1
- stat['ORCID'] = f"{cnt:,} " \
- + "distinct Janelia authors for all entries, " \
- + f"{orc:,} " \
- + f"({orc/cnt*100:.2f}%) with ORCIDs"
- sheet.extend([f"Distinct Janelia authors\t{cnt}", f"Janelia authors with ORCIDs\t{orc}"])
+ stat["ORCID"] = (
+ f"{cnt:,} "
+ + "distinct Janelia authors for all entries, "
+ + f"{orc:,} "
+ + f"({orc/cnt*100:.2f}%) with ORCIDs"
+ )
+ sheet.extend(
+ [f"Distinct Janelia authors\t{cnt}", f"Janelia authors with ORCIDs\t{orc}"]
+ )
# Entries
- if 'DataCite' not in typed:
- typed['DataCite'] = 0
- for key in ('DataCite', 'Crossref'):
+ if "DataCite" not in typed:
+ typed["DataCite"] = 0
+ for key in ("DataCite", "Crossref"):
sheet.insert(0, f"{key} entries\t{typed[key]}")
- stat['Entries'] = f"{typed['Crossref']:,}" \
- + " Crossref entries " \
- + f"{typed['DataCite']:,}" \
- + " DataCite entries"
- if 'Journal articles' not in stat:
- stat['Journal articles'] = "0 journal articles "
- if 'Preprints' not in stat:
- stat['Preprints'] = "0 preprints "
+ stat["Entries"] = (
+ f"{typed['Crossref']:,}"
+ + " Crossref entries "
+ + f"{typed['DataCite']:,}"
+ + " DataCite entries"
+ )
+ if "Journal articles" not in stat:
+ stat["Journal articles"] = (
+ "0 journal articles "
+ )
+ if "Preprints" not in stat:
+ stat["Preprints"] = "0 preprints "
# Authors
try:
- rows = coll.find({"jrc_publishing_date": {"$regex": "^"+ year}})
+ rows = coll.find({"jrc_publishing_date": {"$regex": "^" + year}})
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get frc_author metrics " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get frc_author metrics " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
total = cnt = middle = 0
for row in rows:
total += 1
- field = 'creators' if 'creators' in row else 'author'
- if 'jrc_author' in row and len(row['jrc_author']) == len(row[field]):
+ field = "creators" if "creators" in row else "author"
+ if "jrc_author" in row and len(row["jrc_author"]) == len(row[field]):
cnt += 1
- elif 'jrc_author' not in row:
+ elif "jrc_author" not in row:
middle += 1
- stat['Author'] = f"{cnt:,} " \
- + "entries with all Janelia authors "
- stat['Author'] += f"{total-cnt:,} " \
- + "entries with at least one external collaborator "
- stat['Author'] += f"{middle:,} " \
- + "entries with no Janelia first or last authors "
+ stat["Author"] = (
+ f"{cnt:,} "
+ + "entries with all Janelia authors "
+ )
+ stat["Author"] += (
+ f"{total-cnt:,} "
+ + "entries with at least one external collaborator "
+ )
+ stat["Author"] += (
+ f"{middle:,} "
+ + "entries with no Janelia first or last authors "
+ )
sheet.append(f"Entries with all Janelia authors\t{cnt}")
sheet.append(f"Entries with external collaborators\t{total-cnt}")
sheet.append(f"Entries with no Janelia first or last authors\t{middle}")
# Preprints
no_relation = get_no_relation(year)
- cnt = {'journal': 0, 'preprint': 0}
- for atype in ['journal', 'preprint']:
- for src in ['Crossref', 'DataCite']:
+ cnt = {"journal": 0, "preprint": 0}
+ for atype in ["journal", "preprint"]:
+ for src in ["Crossref", "DataCite"]:
if src in no_relation and atype in no_relation[src]:
cnt[atype] += no_relation[src][atype]
- stat['Preprints'] += f"{cnt['journal']:,}" \
- + " journal articles without preprints "
- stat['Preprints'] += f"{cnt['preprint']:,}" \
- + " preprints without journal articles "
+ stat["Preprints"] += (
+ f"{cnt['journal']:,}"
+ + " journal articles without preprints "
+ )
+ stat["Preprints"] += (
+ f"{cnt['preprint']:,}"
+ + " preprints without journal articles "
+ )
# Journals
journal = get_top_journals(year)
cnt = 0
- stat['Topjournals'] = ""
+ stat["Topjournals"] = ""
sheet.append("Top journals")
for key in sorted(journal, key=journal.get, reverse=True):
- stat['Topjournals'] += f" {key}: {journal[key]} "
+ stat["Topjournals"] += f" {key}: {journal[key]} "
sheet.append(f"\t{key}\t{journal[key]}")
cnt += 1
if cnt >= 10:
break
# Tags
- payload = [{"$match": {"jrc_tag": {"$exists": True}, "jrc_obtained_from": "Crossref",
- "jrc_publishing_date": {"$regex": "^"+ year}}},
- {"$project": {"doi": 1, "type": "$type", "numtags": {"$size": "$jrc_tag"}}}
- ]
+ payload = [
+ {
+ "$match": {
+ "jrc_tag": {"$exists": True},
+ "jrc_obtained_from": "Crossref",
+ "jrc_publishing_date": {"$regex": "^" + year},
+ }
+ },
+ {"$project": {"doi": 1, "type": "$type", "numtags": {"$size": "$jrc_tag"}}},
+ ]
try:
rows = coll.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get frc_author metrics " \
- + "from dois collection"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ "Could not get frc_author metrics " + "from dois collection"
+ ),
+ message=error_message(err),
+ )
cnt = total = 0
for row in rows:
- if 'type' not in row or row['type'] not in ('journal-article', 'posted-content'):
+ if "type" not in row or row["type"] not in (
+ "journal-article",
+ "posted-content",
+ ):
continue
cnt += 1
- total += row['numtags']
- stat['Tags'] = f"{total/cnt:.1f} " \
- + "average tags per tagged entry"
+ total += row["numtags"]
+ stat["Tags"] = (
+ f"{total/cnt:.1f} "
+ + "average tags per tagged entry"
+ )
sheet.append(f"Average tags per tagged entry\t{total/cnt:.1f}")
sheet = create_downloadable(f"{year}_in_review", None, "\n".join(sheet))
- html = f"
"
+ )
+ html += " " + year_pulldown("dois_report", all_years=False)
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"{year}",
+ html=html,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/dois_year")
def dois_year():
- ''' Show publishing years with counts
- '''
- payload = [{"$group": {"_id": {"year": {"$substrBytes": ["$jrc_publishing_date", 0, 4]},
- "source": "$jrc_obtained_from"
- },
- "count": {"$sum": 1}}},
- {"$sort": {"_id.pdate": -1}}
- ]
- try:
- rows = DB['dis'].dois.aggregate(payload)
- except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get tags from dois collection"),
- message=error_message(err))
- html = '
' \
- + '
Year
Crossref
DataCite
' \
- + '
'
+ """Show publishing years with counts"""
+ payload = [
+ {
+ "$group": {
+ "_id": {
+ "year": {"$substrBytes": ["$jrc_publishing_date", 0, 4]},
+ "source": "$jrc_obtained_from",
+ },
+ "count": {"$sum": 1},
+ }
+ },
+ {"$sort": {"_id.pdate": -1}},
+ ]
+ try:
+ rows = DB["dis"].dois.aggregate(payload)
+ except Exception as err:
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get tags from dois collection"),
+ message=error_message(err),
+ )
+ html = (
+ '
'
+ + "
Year
Crossref
DataCite
"
+ + "
"
+ )
years = {}
for row in rows:
- if row['_id']['year'] not in years:
- years[row['_id']['year']] = {}
- if row['_id']['source'] not in years[row['_id']['year']]:
- years[row['_id']['year']][row['_id']['source']] = row['count']
+ if row["_id"]["year"] not in years:
+ years[row["_id"]["year"]] = {}
+ if row["_id"]["source"] not in years[row["_id"]["year"]]:
+ years[row["_id"]["year"]][row["_id"]["source"]] = row["count"]
data = {"years": [], "Crossref": [], "DataCite": []}
for year in sorted(years, reverse=True):
- if year < '2006':
+ if year < "2006":
continue
- data['years'].insert(0, str(year))
- onclick = "onclick='nav_post(\"publishing_year\",\"" + year + "\")'"
+ data["years"].insert(0, str(year))
+ onclick = 'onclick=\'nav_post("publishing_year","' + year + "\")'"
link = f"{year}"
html += f"
{link}
"
- for source in app.config['SOURCES']:
+ for source in app.config["SOURCES"]:
if source in years[year]:
data[source].insert(0, years[year][source])
- onclick = "onclick='nav_post(\"publishing_year\",\"" + year \
- + "\",\"" + source + "\")'"
+ onclick = (
+ 'onclick=\'nav_post("publishing_year","'
+ + year
+ + '","'
+ + source
+ + "\")'"
+ )
link = f"{years[year][source]:,}"
else:
data[source].insert(0, 0)
link = ""
html += f"
{link}
"
html += "
"
- html += '
'
- chartscript, chartdiv = DP.stacked_bar_chart(data, "DOIs published by year/source",
- xaxis="years", yaxis=app.config['SOURCES'],
- colors=DP.SOURCE_PALETTE)
- return make_response(render_template('bokeh.html', urlroot=request.url_root,
- title="DOIs published by year", html=html,
- chartscript=chartscript, chartdiv=chartdiv,
- navbar=generate_navbar('DOIs')))
-
-
-@app.route('/dois_insertpicker')
+ html += "
"
+ chartscript, chartdiv = DP.stacked_bar_chart(
+ data,
+ "DOIs published by year/source",
+ xaxis="years",
+ yaxis=app.config["SOURCES"],
+ colors=DP.SOURCE_PALETTE,
+ )
+ return make_response(
+ render_template(
+ "bokeh.html",
+ urlroot=request.url_root,
+ title="DOIs published by year",
+ html=html,
+ chartscript=chartscript,
+ chartdiv=chartdiv,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/dois_insertpicker")
def show_insert_picker():
- '''
+ """
Show a datepicker for selecting DOIs inserted since a specified date
- '''
+ """
before = "Select a minimum DOI insertion date"
start = last_thursday()
- after = '' \
- + 'Look up DOIs'
- return make_response(render_template('picker.html', urlroot=request.url_root,
- title="DOI lookup by insertion date", before=before,
- start=start, stop=str(date.today()),
- after=after, navbar=generate_navbar('DOIs')))
-
-
-@app.route('/doiui/insert/')
+ after = (
+ ''
+ + "Look up DOIs"
+ )
+ return make_response(
+ render_template(
+ "picker.html",
+ urlroot=request.url_root,
+ title="DOI lookup by insertion date",
+ before=before,
+ start=start,
+ stop=str(date.today()),
+ after=after,
+ navbar=generate_navbar("DOIs"),
+ )
+ )
+
+
+@app.route("/doiui/insert/")
def show_insert(idate):
- '''
+ """
Return DOIs that have been inserted since a specified date
- '''
+ """
try:
- isodate = datetime.strptime(idate,'%Y-%m-%d')
+ isodate = datetime.strptime(idate, "%Y-%m-%d")
except Exception as err:
raise InvalidUsage(str(err), 400) from err
try:
- rows = DB['dis'].dois.find({"jrc_inserted": {"$gte" : isodate}},
- {'_id': 0}).sort([("jrc_obtained_from", 1), ("jrc_inserted", 1)])
+ rows = (
+ DB["dis"]
+ .dois.find({"jrc_inserted": {"$gte": isodate}}, {"_id": 0})
+ .sort([("jrc_obtained_from", 1), ("jrc_inserted", 1)])
+ )
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get DOIs"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get DOIs"),
+ message=error_message(err),
+ )
if not rows:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("DOIs not found"),
- message=f"No DOIs were inserted on or after {idate}")
- html = '
' \
- + '
DOI
Source
Type
Published
Load source
' \
- + '
Inserted
Is version of
Newsletter
'
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("DOIs not found"),
+ message=f"No DOIs were inserted on or after {idate}",
+ )
+ html = (
+ '
'
+ + "
DOI
Source
Type
Published
Load source
"
+ + "
Inserted
Is version of
Newsletter
"
+ )
fileoutput = ""
limit = weeks_ago(2)
for row in rows:
- source = row['jrc_load_source'] if row['jrc_load_source'] else ""
+ source = row["jrc_load_source"] if row["jrc_load_source"] else ""
typ = subtype = ""
- if 'type' in row:
- typ = row['type']
- if 'subtype' in row:
- subtype = row['subtype']
+ if "type" in row:
+ typ = row["type"]
+ if "subtype" in row:
+ subtype = row["subtype"]
typ += f" {subtype}"
- elif 'types' in row and 'resourceTypeGeneral' in row['types']:
- typ = row['types']['resourceTypeGeneral']
+ elif "types" in row and "resourceTypeGeneral" in row["types"]:
+ typ = row["types"]["resourceTypeGeneral"]
version = []
- if 'relation' in row and 'is-version-of' in row['relation']:
- for ver in row['relation']['is-version-of']:
- if ver['id-type'] == 'doi':
- version.append(ver['id'])
+ if "relation" in row and "is-version-of" in row["relation"]:
+ for ver in row["relation"]["is-version-of"]:
+ if ver["id-type"] == "doi":
+ version.append(ver["id"])
version = doi_link(version) if version else ""
- news = row['jrc_newsletter'] if 'jrc_newsletter' in row else ""
- if (not news) and (row['jrc_obtained_from'] == 'Crossref') and \
- (row['jrc_publishing_date'] >= str(limit)) \
- and (typ == 'journal-article' or subtype == 'preprint'):
- rclass = 'candidate'
+ news = row["jrc_newsletter"] if "jrc_newsletter" in row else ""
+ if (
+ (not news)
+ and (row["jrc_obtained_from"] == "Crossref")
+ and (row["jrc_publishing_date"] >= str(limit))
+ and (typ == "journal-article" or subtype == "preprint")
+ ):
+ rclass = "candidate"
else:
- rclass = 'other'
- html += f"
'
- html = create_downloadable(ipd['field'], header, fileoutput) + html
+ for row in sorted(works, key=lambda row: row["published"], reverse=True):
+ html += (
+ "
"
@@ -3334,173 +4085,261 @@ def show_journal_ui(jname, year='All'):
fileoutput = ""
for row in rows:
cnt += 1
- html += f"
{row['jrc_publishing_date']}
{doi_link(row['doi'])}
" \
- + f"
{row['title'][0]}
"
+ html += (
+ f"
{row['jrc_publishing_date']}
{doi_link(row['doi'])}
"
+ + f"
{row['title'][0]}
"
+ )
fileoutput += f"{row['jrc_publishing_date']}\t{row['doi']}\t{row['title'][0]}\n"
- html += '
'
- fname = 'journals'
- if year != 'All':
+ html += "
"
+ fname = "journals"
+ if year != "All":
fname += f"_{year}"
print(fname)
html = create_downloadable(fname, header, fileoutput) + html
title = f"DOIs for {jname} ({cnt})"
- if year != 'All':
+ if year != "All":
title += f" (year={year})"
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=title, html=html,
- navbar=generate_navbar('Journals')))
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=title,
+ html=html,
+ navbar=generate_navbar("Journals"),
+ )
+ )
+
# ******************************************************************************
# * UI endpoints (ORCID) *
# ******************************************************************************
-@app.route('/orcidui/')
+@app.route("/orcidui/")
def show_oid_ui(oid):
- ''' Show ORCID user
- '''
+ """Show ORCID user"""
try:
- resp = requests.get(f"{app.config['ORCID']}{oid}",
- headers={"Accept": "application/json"}, timeout=10)
+ resp = requests.get(
+ f"{app.config['ORCID']}{oid}",
+ headers={"Accept": "application/json"},
+ timeout=10,
+ )
data = resp.json()
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not retrieve ORCID ID"),
- message=error_message(err))
- if 'person' not in data:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning(f"Could not find ORCID ID {oid}", 'warning'),
- message=data['user-message'])
- name = data['person']['name']
- if name['credit-name']:
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not retrieve ORCID ID"),
+ message=error_message(err),
+ )
+ if "person" not in data:
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find ORCID ID {oid}", "warning"),
+ message=data["user-message"],
+ )
+ name = data["person"]["name"]
+ if name["credit-name"]:
who = f"{name['credit-name']['value']}"
- elif 'family-name' not in name or not name['family-name']:
- who = f"{name['given-names']['value']} " \
- + "(Family name is missing in ORCID)"
+ elif "family-name" not in name or not name["family-name"]:
+ who = (
+ f"{name['given-names']['value']} "
+ + "(Family name is missing in ORCID)"
+ )
else:
who = f"{name['given-names']['value']} {name['family-name']['value']}"
try:
- orciddata, dois = get_orcid_from_db(oid, use_eid=bool('userIdO365' in oid), both=True)
+ orciddata, dois = get_orcid_from_db(
+ oid, use_eid=bool("userIdO365" in oid), both=True
+ )
except CustomException as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning(f"Could not find ORCID ID {oid}", 'error'),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find ORCID ID {oid}", "error"),
+ message=error_message(err),
+ )
if not orciddata:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning(f"Could not find ORCID ID {oid}", 'warning'),
- message="Could not find any information for this ORCID ID")
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find ORCID ID {oid}", "warning"),
+ message="Could not find any information for this ORCID ID",
+ )
html = f"
{who}
{orciddata}"
# Works
- if 'works' in data['activities-summary'] and data['activities-summary']['works']['group']:
+ if (
+ "works" in data["activities-summary"]
+ and data["activities-summary"]["works"]["group"]
+ ):
html += add_orcid_works(data, dois)
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"{oid}", html=html,
- navbar=generate_navbar('ORCID')))
-
-
-@app.route('/userui/')
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"{oid}",
+ html=html,
+ navbar=generate_navbar("ORCID"),
+ )
+ )
+
+
+@app.route("/userui/")
def show_user_ui(eid):
- ''' Show user record by employeeId
- '''
+ """Show user record by employeeId"""
try:
orciddata, _ = get_orcid_from_db(eid, use_eid=True)
except CustomException as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning(f"Could not find user ID {eid}",
- 'warning'),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find user ID {eid}", "warning"),
+ message=error_message(err),
+ )
if not orciddata:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning(f"Could not find user ID {eid}", 'warning'),
- message="Could not find any information for this employee ID")
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"User ID {eid}", html=orciddata,
- navbar=generate_navbar('ORCID')))
-
-
-@app.route('/unvaluserui/')
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find user ID {eid}", "warning"),
+ message="Could not find any information for this employee ID",
+ )
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"User ID {eid}",
+ html=orciddata,
+ navbar=generate_navbar("ORCID"),
+ )
+ )
+
+
+@app.route("/unvaluserui/")
def show_unvaluser_ui(iid):
- ''' Show user record by orcid collection ID
- '''
+ """Show user record by orcid collection ID"""
try:
orciddata, _ = get_orcid_from_db(iid, bare=True)
except CustomException as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning(f"Could not find orcid collection ID {iid}",
- 'warning'),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(
+ f"Could not find orcid collection ID {iid}", "warning"
+ ),
+ message=error_message(err),
+ )
if not orciddata:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning(f"Could not find ID {iid}", 'warning'),
- message="Could not find any information for this orcid " \
- + "collection ID")
- return make_response(render_template('general.html', urlroot=request.url_root,
- title="User has no ORCID or employee ID",
- html=orciddata, navbar=generate_navbar('ORCID')))
-
-
-@app.route('/namesui/')
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find ID {iid}", "warning"),
+ message="Could not find any information for this orcid " + "collection ID",
+ )
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title="User has no ORCID or employee ID",
+ html=orciddata,
+ navbar=generate_navbar("ORCID"),
+ )
+ )
+
+
+@app.route("/namesui/")
def show_names_ui(name):
- ''' Show user names
- '''
- payload = {"$or": [{"family": {"$regex": name, "$options" : "i"}},
- {"given": {"$regex": name, "$options" : "i"}},
- ]}
- try:
- if not DB['dis'].orcid.count_documents(payload):
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning("Could not find name", 'warning'),
- message=f"Could not find any names matching {name}")
- rows = DB['dis'].orcid.find(payload).collation({"locale": "en"}).sort("family", 1)
- except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not count names in dois collection"),
- message=error_message(err))
+ """Show user names"""
+ payload = {
+ "$or": [
+ {"family": {"$regex": name, "$options": "i"}},
+ {"given": {"$regex": name, "$options": "i"}},
+ ]
+ }
+ try:
+ if not DB["dis"].orcid.count_documents(payload):
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not find name", "warning"),
+ message=f"Could not find any names matching {name}",
+ )
+ rows = (
+ DB["dis"].orcid.find(payload).collation({"locale": "en"}).sort("family", 1)
+ )
+ except Exception as err:
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not count names in dois collection"),
+ message=error_message(err),
+ )
html, count = generate_user_table(rows)
html = f"Search term: {name} " + html
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"Authors: {count:,}", html=html,
- navbar=generate_navbar('ORCID')))
-
-
-@app.route('/orcid_tag')
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"Authors: {count:,}",
+ html=html,
+ navbar=generate_navbar("ORCID"),
+ )
+ )
+
+
+@app.route("/orcid_tag")
def orcid_tag():
- ''' Show ORCID tags (affiliations) with counts
- '''
- payload = [{"$unwind" : "$affiliations"},
- {"$project": {"_id": 0, "affiliations": 1, "orcid": 1}},
- {"$group": {"_id": "$affiliations", "count":{"$sum": 1},
- "orcid": {"$push": "$orcid"}}},
- {"$sort": {"_id": 1}}
- ]
- try:
- orgs = DL.get_supervisory_orgs(DB['dis'].suporg)
- except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get supervisory orgs"),
- message=error_message(err))
- try:
- rows = DB['dis'].orcid.aggregate(payload)
- except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get affiliations " \
- + "from orcid collection"),
- message=error_message(err))
- html = ""
- html += '
',
+ navbar=generate_navbar("ORCID"),
+ )
+ )
html = "
Select a name for details:
"
- html += "
Name
" \
- + "
Title
Location
"
+ html += (
+ "
Name
"
+ + "
Title
Location
"
+ )
for rec in response:
pname = f"{rec['nameFirstPreferred']} {rec['nameLastPreferred']}"
link = f"{pname}"
- loc = rec['locationName'] if 'locationName' in rec else ""
+ loc = rec["locationName"] if "locationName" in rec else ""
if "Janelia" in loc:
loc = f"{loc}"
html += f"
{link}
{rec['businessTitle']}
{loc}
"
html += "
"
- return make_response(render_template('people.html', urlroot=request.url_root,
- title="Search People system", content=html,
- navbar=generate_navbar('External systems')))
-
-
-@app.route('/peoplerec/')
+ return make_response(
+ render_template(
+ "people.html",
+ urlroot=request.url_root,
+ title="Search People system",
+ content=html,
+ navbar=generate_navbar("External systems"),
+ )
+ )
+
+
+@app.route("/peoplerec/")
def peoplerec(eid):
- ''' Show a single People record
- '''
+ """Show a single People record"""
try:
rec = JRC.call_people_by_id(eid)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning(f"Could not get People data for {eid}"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not get People data for {eid}"),
+ message=error_message(err),
+ )
if not rec:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning(f"Could not find People record for {eid}"),
- message="No record found")
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find People record for {eid}"),
+ message="No record found",
+ )
title = f"{rec['nameFirstPreferred']} {rec['nameLastPreferred']}"
- for field in ['employeeId', 'managerId']:
+ for field in ["employeeId", "managerId"]:
if field in rec:
del rec[field]
- if 'photoURL' in rec:
- title += f" "
+ if "photoURL" in rec:
+ title += (
+ f" "
+ )
html = f"
{json.dumps(rec, indent=2)}
"
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=title, html=html,
- navbar=generate_navbar('External systems')))
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=title,
+ html=html,
+ navbar=generate_navbar("External systems"),
+ )
+ )
+
# ******************************************************************************
# * UI endpoints (stats) *
# ******************************************************************************
-@app.route('/stats_database')
+@app.route("/stats_database")
def stats_database():
- ''' Show database stats
- '''
+ """Show database stats"""
collection = {}
try:
- cnames = DB['dis'].list_collection_names()
+ cnames = DB["dis"].list_collection_names()
for cname in cnames:
- stat = DB['dis'].command('collStats', cname)
+ stat = DB["dis"].command("collStats", cname)
indices = []
- for key, val in stat['indexSizes'].items():
+ for key, val in stat["indexSizes"].items():
indices.append(f"{key} ({humansize(val, space='mem')})")
- free = stat['freeStorageSize'] / stat['storageSize'] * 100
- if 'avgObjSize' not in stat:
- stat['avgObjSize'] = 0
- collection[cname] = {"docs": f"{stat['count']:,}",
- "docsize": humansize(stat['avgObjSize'], space='mem'),
- "size": humansize(stat['storageSize'], space='mem'),
- "free": f"{free:.2f}%",
- "idx": ", ".join(indices)
- }
- except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get collection stats"),
- message=error_message(err))
- html = '
"
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title="Database statistics",
+ html=html,
+ navbar=generate_navbar("Stats"),
+ )
+ )
+
# ******************************************************************************
# * UI endpoints (tags) *
# ******************************************************************************
-@app.route('/tag/')
+@app.route("/tag/")
def tagrec(tag):
- ''' Show a single tag
- '''
+ """Show a single tag"""
payload = {"affiliations": tag}
try:
- acnt = DB['dis'].orcid.count_documents(payload)
+ acnt = DB["dis"].orcid.count_documents(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get users for tag"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get users for tag"),
+ message=error_message(err),
+ )
tagtype = "Affiliation" if acnt else ""
try:
- orgs = DL.get_supervisory_orgs(DB['dis'].suporg)
+ orgs = DL.get_supervisory_orgs(DB["dis"].suporg)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get supervisory orgs"),
- message=error_message(err))
- payload = [{"$match": {"jrc_tag.name": tag}},
- {"$unwind": "$jrc_tag"},
- {"$match": {"jrc_tag.name": tag}},
- {"$group": {"_id": "$jrc_tag.type", "count": {"$sum": 1}}},
- {"$sort": {"_id": 1}}
- ]
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get supervisory orgs"),
+ message=error_message(err),
+ )
+ payload = [
+ {"$match": {"jrc_tag.name": tag}},
+ {"$unwind": "$jrc_tag"},
+ {"$match": {"jrc_tag.name": tag}},
+ {"$group": {"_id": "$jrc_tag.type", "count": {"$sum": 1}}},
+ {"$sort": {"_id": 1}},
+ ]
try:
- rows = DB['dis'].dois.aggregate(payload)
+ rows = DB["dis"].dois.aggregate(payload)
except Exception as err:
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get DOIs for tag"),
- message=error_message(err))
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get DOIs for tag"),
+ message=error_message(err),
+ )
html = "
"
pdict = {}
for row in rows:
- pdict[row['_id']] = row['count']
+ pdict[row["_id"]] = row["count"]
if not pdict and not acnt:
- return render_template('warning.html', urlroot=request.url_root,
- title=render_warning(f"Could not find tag {tag}", 'warning'),
- message="No DOI tags or user affiliations found")
+ return render_template(
+ "warning.html",
+ urlroot=request.url_root,
+ title=render_warning(f"Could not find tag {tag}", "warning"),
+ message="No DOI tags or user affiliations found",
+ )
parr = []
for key, val in pdict.items():
parr.append(f"{key}: {val}")
if tag in orgs:
- tagtype = 'Supervisory org'
+ tagtype = "Supervisory org"
html += f"
Tag type
{tagtype}
"
html += f"
Code
{orgs[tag]['code']}
"
html += "
Status
"
- if 'active' in orgs[tag]:
+ if "active" in orgs[tag]:
html += "Active
"
else:
html += "Inactive"
else:
html += f"
Tag type
{tagtype}
"
if pdict:
- onclick = "onclick='nav_post(\"jrc_tag.name\",\"" + tag + "\")'"
+ onclick = 'onclick=\'nav_post("jrc_tag.name","' + tag + "\")'"
link = f"Show DOIs"
html += f"
"
- return make_response(render_template('general.html', urlroot=request.url_root,
- title=f"Tag {tag}", html=html,
- navbar=generate_navbar('Tag/affiliation')))
+ return make_response(
+ render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"Tag {tag}",
+ html=html,
+ navbar=generate_navbar("Tag/affiliation"),
+ )
+ )
+
# ******************************************************************************
# * Multi-role endpoints (ORCID) *
# ******************************************************************************
-@app.route('/groups')
+
+@app.route("/groups")
def show_groups():
- '''
+ """
Show group owners from ORCID
Return records whose ORCIDs have a group
---
@@ -3905,45 +4924,65 @@ def show_groups():
description: groups
500:
description: MongoDB error
- '''
+ """
result = initialize_result()
- expected = 'html' if 'Accept' in request.headers \
- and 'html' in request.headers['Accept'] else 'json'
+ expected = (
+ "html"
+ if "Accept" in request.headers and "html" in request.headers["Accept"]
+ else "json"
+ )
payload = {"group": {"$exists": True}}
try:
- rows = DB['dis'].orcid.find(payload, {'_id': 0}).sort("group", 1)
+ rows = DB["dis"].orcid.find(payload, {"_id": 0}).sort("group", 1)
except Exception as err:
- if expected == 'html':
- return render_template('error.html', urlroot=request.url_root,
- title=render_warning("Could not get groups from MongoDB"),
- message=error_message(err))
+ if expected == "html":
+ return render_template(
+ "error.html",
+ urlroot=request.url_root,
+ title=render_warning("Could not get groups from MongoDB"),
+ message=error_message(err),
+ )
raise InvalidUsage(str(err), 500) from err
- if expected == 'json':
- result['rest']['source'] = 'mongo'
- result['data'] = []
+ if expected == "json":
+ result["rest"]["source"] = "mongo"
+ result["data"] = []
for row in rows:
- result['data'].append(row)
- result['rest']['row_count'] = len(result['data'])
+ result["data"].append(row)
+ result["rest"]["row_count"] = len(result["data"])
return generate_response(result)
- html = '
Name
ORCID
Group
' \
- + '
Affiliations
'
+ html = (
+ '
Name
ORCID
Group
'
+ + "
Affiliations
"
+ )
count = 0
for row in rows:
count += 1
- if 'affiliations' not in row:
- row['affiliations'] = ''
- link = f"{row['orcid']}" if 'orcid' in row else ''
- html += f"
{row['given'][0]} {row['family'][0]}
" \
- + f"
{link}
{row['group']}
" \
- + f"
{', '.join(row['affiliations'])}
"
- html += '
'
- return render_template('general.html', urlroot=request.url_root, title=f"Groups ({count:,})",
- html=html, navbar=generate_navbar('ORCID'))
+ if "affiliations" not in row:
+ row["affiliations"] = ""
+ link = (
+ f"{row['orcid']}"
+ if "orcid" in row
+ else ""
+ )
+ html += (
+ f"
{row['given'][0]} {row['family'][0]}
"
+ + f"
{link}
{row['group']}
"
+ + f"
{', '.join(row['affiliations'])}
"
+ )
+ html += "
"
+ return render_template(
+ "general.html",
+ urlroot=request.url_root,
+ title=f"Groups ({count:,})",
+ html=html,
+ navbar=generate_navbar("ORCID"),
+ )
+
# *****************************************************************************
-if __name__ == '__main__':
- if app.config["RUN_MODE"] == 'dev':
+if __name__ == "__main__":
+ if app.config["RUN_MODE"] == "dev":
app.run(debug=app.config["DEBUG"])
else:
app.run(debug=app.config["DEBUG"])
diff --git a/etl/bin/fix_jrc_author.py b/etl/bin/fix_jrc_author.py
index 06ecfed..b232379 100644
--- a/etl/bin/fix_jrc_author.py
+++ b/etl/bin/fix_jrc_author.py
@@ -1,9 +1,9 @@
-""" fix_jrc_author.py
- Add jrc_author field to DOIs. DOIs are selected by employee ID or by
- the absence of the jrc_author field.
+"""fix_jrc_author.py
+Add jrc_author field to DOIs. DOIs are selected by employee ID or by
+the absence of the jrc_author field.
"""
-__version__ = '1.1.0'
+__version__ = "1.1.0"
import argparse
import collections
@@ -20,13 +20,14 @@
# Counters
COUNT = collections.defaultdict(lambda: 0, {})
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -35,21 +36,27 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -57,74 +64,108 @@ def initialize_program():
def get_dois():
- ''' Get a list of DOIs to process
- Keyword arguments:
- None
- Returns:
- cnt: row count
- rows: rows object
- '''
+ """Get a list of DOIs to process
+ Keyword arguments:
+ None
+ Returns:
+ cnt: row count
+ rows: rows object
+ """
if ARG.EMPLOYEE:
try:
- orc = DB['dis'].orcid.find_one({"employeeId": ARG.EMPLOYEE})
+ orc = DB["dis"].orcid.find_one({"employeeId": ARG.EMPLOYEE})
except Exception as err:
terminate_program(err)
if not orc:
terminate_program(f"Employee ID {ARG.EMPLOYEE} not found")
- payload = {"$and": [{"$or": [{"author.given": {"$in": orc['given']}},
- {"creators.givenName": {"$in": orc['given']}}]},
- {"$or": [{"author.family": {"$in": orc['family']}},
- {"creators.familyName": {"$in": orc['family']}}]}]
- }
+ payload = {
+ "$and": [
+ {
+ "$or": [
+ {"author.given": {"$in": orc["given"]}},
+ {"creators.givenName": {"$in": orc["given"]}},
+ ]
+ },
+ {
+ "$or": [
+ {"author.family": {"$in": orc["family"]}},
+ {"creators.familyName": {"$in": orc["family"]}},
+ ]
+ },
+ ]
+ }
else:
- payload = {"$or": [{"author": {"$exists": True}},
- {"creators": {"$exists": True}}],
- "jrc_author": {"$exists": False}
- }
+ payload = {
+ "$or": [{"author": {"$exists": True}}, {"creators": {"$exists": True}}],
+ "jrc_author": {"$exists": False},
+ }
try:
- cnt = DB['dis'].dois.count_documents(payload)
- rows = DB['dis'].dois.find(payload)
+ cnt = DB["dis"].dois.count_documents(payload)
+ rows = DB["dis"].dois.find(payload)
except Exception as err:
terminate_program(err)
return cnt, rows
def add_jrc_author():
- """ Update tags for specified DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Update tags for specified DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
LOGGER.info(f"Started run (version {__version__})")
cnt, rows = get_dois()
for row in tqdm(rows, total=cnt):
- COUNT['read'] += 1
- auth = DL.update_jrc_author(row['doi'], DB['dis'].dois, DB['dis'].orcid, write=ARG.WRITE)
+ COUNT["read"] += 1
+ auth = DL.update_jrc_author(
+ row["doi"], DB["dis"].dois, DB["dis"].orcid, write=ARG.WRITE
+ )
if auth:
- COUNT['updated'] += 1
+ COUNT["updated"] += 1
LOGGER.debug(f"{row['doi']} {auth}")
print(f"DOIs read: {COUNT['read']:,}")
print(f"DOIs updated: {COUNT['updated']:,}")
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Add jrc_author")
- PARSER.add_argument('--employee', dest='EMPLOYEE', action='store',
- help='Employee ID')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Add jrc_author")
+ PARSER.add_argument(
+ "--employee", dest="EMPLOYEE", action="store", help="Employee ID"
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/etl/bin/fix_middle_names.py b/etl/bin/fix_middle_names.py
index 039802d..204810e 100644
--- a/etl/bin/fix_middle_names.py
+++ b/etl/bin/fix_middle_names.py
@@ -1,13 +1,14 @@
-''' fix_middle_names.py
- Expand the number of given names in the orcid collection.
- There are two different modes:
- (default): Add a given name without a period following a middle initial
- (--period): Add a period to the end of the given name if it is a space
- followed by a middle initial
- Both modes will also look for the opportunity to add just a first name.
- Be sure to first run this without --write - there may be some strange given names
- that will generate equally strage results!
-'''
+"""fix_middle_names.py
+Expand the number of given names in the orcid collection.
+There are two different modes:
+(default): Add a given name without a period following a middle initial
+(--period): Add a period to the end of the given name if it is a space
+ followed by a middle initial
+Both modes will also look for the opportunity to add just a first name.
+Be sure to first run this without --write - there may be some strange given names
+that will generate equally strage results!
+"""
+
import argparse
from operator import attrgetter
import re
@@ -21,34 +22,41 @@
# Counters
COUNT = {"read": 0, "found": 0}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
LOGGER.critical(msg)
sys.exit(-1 if msg else 0)
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -56,91 +64,93 @@ def initialize_program():
def process_single_add_period(row):
- ''' Add a given name with a period after the middle initial
- Keyword arguments:
- row: a single row from the orcid collection
- Returns:
- None
- '''
- COUNT['read'] += 1
+ """Add a given name with a period after the middle initial
+ Keyword arguments:
+ row: a single row from the orcid collection
+ Returns:
+ None
+ """
+ COUNT["read"] += 1
found = False
- for given in row['given']:
- if given[-1] == '.':
+ for given in row["given"]:
+ if given[-1] == ".":
found = True
- COUNT['found'] += 1
+ COUNT["found"] += 1
break
if not found:
LOGGER.warning(f"Adding given name(s) {row['given']}")
- payload = {"given": row['given']}
- for given in row['given']:
- if re.search(r' [A-Z]$', given):
- payload['given'].append(given + '.')
+ payload = {"given": row["given"]}
+ for given in row["given"]:
+ if re.search(r" [A-Z]$", given):
+ payload["given"].append(given + ".")
break
found = False
- for given in row['given']:
- if re.search(r'^[A-Za-z]+$', given):
+ for given in row["given"]:
+ if re.search(r"^[A-Za-z]+$", given):
found = True
break
if not found:
- given = row['given'][0].split(' ')[0]
- payload['given'].append(given)
+ given = row["given"][0].split(" ")[0]
+ payload["given"].append(given)
print(payload)
if ARG.WRITE:
try:
- DB['dis'].orcid.update_one({"_id": row['_id']}, {"$set": payload})
+ DB["dis"].orcid.update_one({"_id": row["_id"]}, {"$set": payload})
except Exception as err:
terminate_program(err)
def process_single_add_no_period(row):
- ''' Add a given name with no period after the middle initial
- Keyword arguments:
- row: a single row from the orcid collection
- Returns:
- None
- '''
- COUNT['read'] += 1
+ """Add a given name with no period after the middle initial
+ Keyword arguments:
+ row: a single row from the orcid collection
+ Returns:
+ None
+ """
+ COUNT["read"] += 1
found = False
- for given in row['given']:
- if re.search(r'^[A-Z]\. [A-Z]\.$', given) \
- or re.search(r' [A-Z]$', given):
+ for given in row["given"]:
+ if re.search(r"^[A-Z]\. [A-Z]\.$", given) or re.search(r" [A-Z]$", given):
found = True
- COUNT['found'] += 1
+ COUNT["found"] += 1
break
if not found:
LOGGER.warning(f"Adding given name(s) to {row['given']}")
- payload = {"given": row['given']}
- for given in row['given']:
- if re.search(r' [A-Z].$', given):
- payload['given'].append(given.replace('.', ''))
+ payload = {"given": row["given"]}
+ for given in row["given"]:
+ if re.search(r" [A-Z].$", given):
+ payload["given"].append(given.replace(".", ""))
break
found = False
- for given in row['given']:
- if re.search(r'^[A-Za-z]+$', given):
+ for given in row["given"]:
+ if re.search(r"^[A-Za-z]+$", given):
found = True
break
if not found:
- given = row['given'][0].split(' ')[0]
- payload['given'].append(given)
+ given = row["given"][0].split(" ")[0]
+ payload["given"].append(given)
print(payload)
if ARG.WRITE:
try:
- DB['dis'].orcid.update_one({"_id": row['_id']}, {"$set": payload})
+ DB["dis"].orcid.update_one({"_id": row["_id"]}, {"$set": payload})
except Exception as err:
terminate_program(err)
def process_orcid():
- ''' Find and process given names in the orcid collection
- Keyword arguments:
- None
- Returns:
- None
- '''
- payload = {"given": {"$regex": " [A-Z]$"}} if ARG.PERIOD \
- else {"given": {"$regex": r" [A-Z]\.$"}}
+ """Find and process given names in the orcid collection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
+ payload = (
+ {"given": {"$regex": " [A-Z]$"}}
+ if ARG.PERIOD
+ else {"given": {"$regex": r" [A-Z]\.$"}}
+ )
try:
- rows = DB['dis'].orcid.find(payload)
+ rows = DB["dis"].orcid.find(payload)
except Exception as err:
terminate_program(err)
for row in rows:
@@ -153,22 +163,48 @@ def process_orcid():
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Update orcid collection with additional given names")
- PARSER.add_argument('--period', dest='PERIOD', action='store_true',
- help='Add a period to middle initials')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ description="Update orcid collection with additional given names"
+ )
+ PARSER.add_argument(
+ "--period",
+ dest="PERIOD",
+ action="store_true",
+ help="Add a period to middle initials",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/etl/bin/group_search.py b/etl/bin/group_search.py
index 5d9b912..dc35261 100644
--- a/etl/bin/group_search.py
+++ b/etl/bin/group_search.py
@@ -1,5 +1,5 @@
-""" group_search.py
- Find resources authored by groups (non-individuals) and write to a file
+"""group_search.py
+Find resources authored by groups (non-individuals) and write to a file
"""
import argparse
@@ -13,13 +13,14 @@
DB = {}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -28,22 +29,24 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
- manifold = ARG.MANIFOLD if source == 'dis' else 'prod'
+ manifold = ARG.MANIFOLD if source == "dis" else "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -51,45 +54,47 @@ def initialize_program():
def search_single_group(group):
- ''' Find DOIs that are authored by a given group
- Keyword arguments:
- group: group to check
- Returns:
- List of DOI records
- '''
- suffix = CONFIG['crossref']['name']
+ """Find DOIs that are authored by a given group
+ Keyword arguments:
+ group: group to check
+ Returns:
+ List of DOI records
+ """
+ suffix = CONFIG["crossref"]["name"]
complete = False
parts = 0
- records= []
- coll = DB['dis'].dois
+ records = []
+ coll = DB["dis"].dois
while not complete:
try:
if parts:
- resp = JRC.call_crossref(f"{suffix}{group}&offset={parts*1000}", timeout=20)
+ resp = JRC.call_crossref(
+ f"{suffix}{group}&offset={parts*1000}", timeout=20
+ )
else:
resp = JRC.call_crossref(f"{suffix}{group}", timeout=20)
except Exception as err:
terminate_program(err)
- recs = resp['message']['items']
+ recs = resp["message"]["items"]
if not recs:
break
parts += 1
for rec in recs:
- row = coll.find_one({"doi": rec['DOI']})
+ row = coll.find_one({"doi": rec["DOI"]})
if not row:
records.append(rec)
return records
def perform_search():
- ''' Find DOIs that are authored by groups
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Find DOIs that are authored by groups
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
new_doi = {}
- for group in ('COSEM', 'CellMap', 'FlyLight', 'FlyEM', 'GENIE'):
+ for group in ("COSEM", "CellMap", "FlyLight", "FlyEM", "GENIE"):
print(f"Getting group-authored resources for {group}")
rows = search_single_group(group)
LOGGER.info(f"{group}: {len(rows)}")
@@ -104,33 +109,50 @@ def perform_search():
except Exception as _:
LOGGER.warning(f"Could not find authors for {row['DOI']}")
continue
- if 'Project' not in authors:
+ if "Project" not in authors:
continue
- if group == 'GENIE' and 'The GENIE Project' not in authors:
+ if group == "GENIE" and "The GENIE Project" not in authors:
continue
- new_doi[row['DOI']] = authors
+ new_doi[row["DOI"]] = authors
if new_doi:
with open("new_group_dois.txt", "w", encoding="ascii") as outstream:
for doi in new_doi:
outstream.write(f"{doi}\n")
LOGGER.warning("Wrote DOI file new_group_dois.txt")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Search Crossref for named (group) authors")
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ description="Search Crossref for named (group) authors"
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
CONFIG = configparser.ConfigParser()
- CONFIG.read('config.ini')
+ CONFIG.read("config.ini")
initialize_program()
perform_search()
terminate_program()
diff --git a/sync/bin/email_authors.py b/sync/bin/email_authors.py
index 19b31e1..8a94510 100644
--- a/sync/bin/email_authors.py
+++ b/sync/bin/email_authors.py
@@ -1,6 +1,6 @@
-''' email_authors.py
- Email information on newly-added DOIs to authors
-'''
+"""email_authors.py
+Email information on newly-added DOIs to authors
+"""
import argparse
from datetime import datetime, timedelta
@@ -17,13 +17,14 @@
AUTHORLIST = {}
TAGLIST = {}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -32,20 +33,26 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -53,12 +60,12 @@ def initialize_program():
def get_citation(row):
- ''' Create a citation for a DOI
- Keyword arguments:
- row: row from the dois collection
- Returns:
- DIS-style citation
- '''
+ """Create a citation for a DOI
+ Keyword arguments:
+ row: row from the dois collection
+ Returns:
+ DIS-style citation
+ """
authors = DL.get_author_list(row)
title = DL.get_title(row)
@@ -66,110 +73,132 @@ def get_citation(row):
def create_doilists(row):
- ''' Create an authorlist for a DOI
- Keyword arguments:
- row: row from the dois collection
- Returns:
- None
- '''
- if 'jrc_tag' in row:
+ """Create an authorlist for a DOI
+ Keyword arguments:
+ row: row from the dois collection
+ Returns:
+ None
+ """
+ if "jrc_tag" in row:
rtags = []
- for tag in row['jrc_tag']:
- rtags.append(tag['name'])
- TAGLIST[row['doi']] = ", ".join(rtags)
- if 'jrc_author' not in row:
+ for tag in row["jrc_tag"]:
+ rtags.append(tag["name"])
+ TAGLIST[row["doi"]] = ", ".join(rtags)
+ if "jrc_author" not in row:
return
names = []
- for auth in row['jrc_author']:
+ for auth in row["jrc_author"]:
try:
resp = JRC.call_people_by_id(auth)
except Exception as err:
print(type(err).__name__)
LOGGER.warning(f"Error calling people by ID: {err}")
terminate_program(err)
- if not resp or 'employeeId' not in resp or not resp['employeeId']:
+ if not resp or "employeeId" not in resp or not resp["employeeId"]:
LOGGER.warning(f"No People information found for {auth}")
continue
try:
- names.append(' '.join([resp['nameFirstPreferred'], resp['nameLastPreferred']]))
+ names.append(
+ " ".join([resp["nameFirstPreferred"], resp["nameLastPreferred"]])
+ )
except Exception as err:
LOGGER.warning(f"Error getting author name: {err}")
terminate_program(err)
- AUTHORLIST[row['doi']] = ", ".join(names)
+ AUTHORLIST[row["doi"]] = ", ".join(names)
def valid_author(authid):
- ''' Check if an author is valid
- Keyword arguments:
- authid: author ID
- Returns:
- True if valid, False otherwise
- '''
- orc = DL.single_orcid_lookup(authid, DB['dis'].orcid, 'employeeId')
- if not orc or 'alumni' in orc:
+ """Check if an author is valid
+ Keyword arguments:
+ authid: author ID
+ Returns:
+ True if valid, False otherwise
+ """
+ orc = DL.single_orcid_lookup(authid, DB["dis"].orcid, "employeeId")
+ if not orc or "alumni" in orc:
return False
- return orc['orcid'] if 'orcid' in orc else True
+ return orc["orcid"] if "orcid" in orc else True
def process_authors(authors, publications, cnt):
- ''' Create and send emails to each author with their resources
- Keyword arguments:
- authors: dictionary of authors and their citations
- publications: list of citations
- cnt: DOI count
- Returns:
- None
- '''
+ """Create and send emails to each author with their resources
+ Keyword arguments:
+ authors: dictionary of authors and their citations
+ publications: list of citations
+ cnt: DOI count
+ Returns:
+ None
+ """
# Individual author emails
summary = ""
for auth, val in authors.items():
resp = JRC.call_people_by_id(auth)
- if not resp or 'employeeId' not in resp or not resp['employeeId']:
+ if not resp or "employeeId" not in resp or not resp["employeeId"]:
LOGGER.warning(f"No People information found for {auth}")
continue
- name = ' '.join([resp['nameFirstPreferred'], resp['nameLastPreferred']])
+ name = " ".join([resp["nameFirstPreferred"], resp["nameLastPreferred"]])
author_valid = valid_author(auth)
if not author_valid:
LOGGER.warning(f"Skipping author {name}")
continue
- email = DISCONFIG['developer'] if ARG.TEST else resp['email']
- subject = "Your recent publication" if len(val['citations']) == 1 \
- else "Your recent publications"
- text1 = "publication has been added" if len(val['citations']) == 1 \
- else "publications have been added"
- text = f"Hello {resp['nameFirstPreferred']},
" \
- + "This is an automated email from Janelia’s Data and Information Services " \
- + f"department (DIS). Your recent {text1} to our database. " \
- + "Please review that the metadata below are correct. " \
- + "No action is required from you, but if you " \
- + "see an error, please let us know.
"
- text += "Tags: There may be multiple redundant " \
- + "tags for the same lab, project team, or support team. This is fine. Just " \
- + "let us know if there is a lab/team that is missing, or if we’ve included " \
- + "a lab/team that doesn’t belong.
"
- text += "Janelia authors: The employee names " \
- + "listed below may not correspond perfectly to the author names on the paper " \
- + "(e.g., Jane Doe / Janet P. Doe). This is fine. Just let us know if we’ve " \
- + "missed anyone, or if we’ve included someone we shouldn’t have."
+ email = DISCONFIG["developer"] if ARG.TEST else resp["email"]
+ subject = (
+ "Your recent publication"
+ if len(val["citations"]) == 1
+ else "Your recent publications"
+ )
+ text1 = (
+ "publication has been added"
+ if len(val["citations"]) == 1
+ else "publications have been added"
+ )
+ text = (
+ f"Hello {resp['nameFirstPreferred']},
"
+ + "This is an automated email from Janelia’s Data and Information Services "
+ + f"department (DIS). Your recent {text1} to our database. "
+ + "Please review that the metadata below are correct. "
+ + "No action is required from you, but if you "
+ + "see an error, please let us know.
"
+ )
+ text += (
+ "Tags: There may be multiple redundant "
+ + "tags for the same lab, project team, or support team. This is fine. Just "
+ + "let us know if there is a lab/team that is missing, or if we’ve included "
+ + "a lab/team that doesn’t belong.
"
+ )
+ text += (
+ "Janelia authors: The employee names "
+ + "listed below may not correspond perfectly to the author names on the paper "
+ + "(e.g., Jane Doe / Janet P. Doe). This is fine. Just let us know if we’ve "
+ + "missed anyone, or if we’ve included someone we shouldn’t have."
+ )
if isinstance(author_valid, bool):
LOGGER.warning(f"Author {name} has no ORCID")
- text += "
Note: We could not find " \
- + "an ORCID for you. To create one, please visit " \
- + "ORCID."
+ text += (
+ "
Note: We could not find "
+ + "an ORCID for you. To create one, please visit "
+ + "ORCID."
+ )
text += "
Thank you!
"
- for res in val['citations']:
+ for res in val["citations"]:
text += f"{res}"
- doi = val['dois'].pop(0)
+ doi = val["dois"].pop(0)
if doi in TAGLIST:
- text += f" Tags: {TAGLIST[doi]}"
+ text += (
+ f" Tags: {TAGLIST[doi]}"
+ )
if doi in AUTHORLIST:
- text += " Janelia authors: " \
- + f"{AUTHORLIST[doi]}"
+ text += (
+ " Janelia authors: "
+ + f"{AUTHORLIST[doi]}"
+ )
text += "
"
- summary += f"{name} has {len(val['citations'])} " \
- + f"citation{'' if len(val['citations']) == 1 else 's'} "
+ summary += (
+ f"{name} has {len(val['citations'])} "
+ + f"citation{'' if len(val['citations']) == 1 else 's'} "
+ )
if ARG.WRITE or ARG.TEST:
- JRC.send_email(text, DISCONFIG['sender'], [email], subject, mime='html')
+ JRC.send_email(text, DISCONFIG["sender"], [email], subject, mime="html")
LOGGER.info(f"Email sent to {name} ({email})")
if not (ARG.WRITE or ARG.TEST):
return
@@ -178,27 +207,36 @@ def process_authors(authors, publications, cnt):
text = f"{subject}. DOIs: {cnt} Authors: {len(authors)}
"
text += "
".join(publications)
text += "
" + summary
- email = DISCONFIG['developer'] if ARG.TEST else DISCONFIG['receivers']
- JRC.send_email(text, DISCONFIG['sender'], email, subject, mime='html')
+ email = DISCONFIG["developer"] if ARG.TEST else DISCONFIG["receivers"]
+ JRC.send_email(text, DISCONFIG["sender"], email, subject, mime="html")
def process_dois():
- ''' Find and process DOIs
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Find and process DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
week_ago = (datetime.today() - timedelta(days=ARG.DAYS)).strftime("%Y-%m-%d")
- LOGGER.info(f"Finding DOIs from the last {ARG.DAYS} day{'' if ARG.DAYS == 1 else 's'} " \
- + f"({week_ago})")
- payload = {"jrc_newsletter": {"$gte": week_ago}, "jrc_author": {"$exists": True},
- "$or": [{"jrc_obtained_from": "Crossref"},
- {"jrc_obtained_from": "DataCite",
- "types.resourceTypeGeneral": {"$ne": "Dataset"}}]}
+ LOGGER.info(
+ f"Finding DOIs from the last {ARG.DAYS} day{'' if ARG.DAYS == 1 else 's'} "
+ + f"({week_ago})"
+ )
+ payload = {
+ "jrc_newsletter": {"$gte": week_ago},
+ "jrc_author": {"$exists": True},
+ "$or": [
+ {"jrc_obtained_from": "Crossref"},
+ {
+ "jrc_obtained_from": "DataCite",
+ "types.resourceTypeGeneral": {"$ne": "Dataset"},
+ },
+ ],
+ }
try:
- cnt = DB['dis'].dois.count_documents(payload)
- rows = DB['dis'].dois.find(payload)
+ cnt = DB["dis"].dois.count_documents(payload)
+ rows = DB["dis"].dois.find(payload)
except Exception as err:
terminate_program(err)
LOGGER.info(f"DOIs found: {cnt}")
@@ -207,34 +245,67 @@ def process_dois():
for row in rows:
citation = get_citation(row)
publications.append(citation)
- for auth in row['jrc_author']:
+ for auth in row["jrc_author"]:
if auth not in authors:
authors[auth] = {"citations": [], "dois": []}
- authors[auth]['citations'].append(citation)
- authors[auth]['dois'].append(row['doi'])
- if row['doi'] not in AUTHORLIST:
+ authors[auth]["citations"].append(citation)
+ authors[auth]["dois"].append(row["doi"])
+ if row["doi"] not in AUTHORLIST:
create_doilists(row)
LOGGER.info(f"Authors found: {len(authors)}")
process_authors(authors, publications, cnt)
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Email information on newly-added DOIs to author")
- PARSER.add_argument('--days', dest='DAYS', action='store', type=int,
- default=5, help='Number of days to go back for DOIs')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--test', dest='TEST', action='store_true',
- default=False, help='Send emails to developer')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Actually send emails')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ description="Email information on newly-added DOIs to author"
+ )
+ PARSER.add_argument(
+ "--days",
+ dest="DAYS",
+ action="store",
+ type=int,
+ default=5,
+ help="Number of days to go back for DOIs",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--test",
+ dest="TEST",
+ action="store_true",
+ default=False,
+ help="Send emails to developer",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Actually send emails",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
DISCONFIG = JRC.simplenamespace_to_dict(JRC.get_config("dis"))
diff --git a/sync/bin/find_unloaded_relations.py b/sync/bin/find_unloaded_relations.py
index ee10991..2295775 100644
--- a/sync/bin/find_unloaded_relations.py
+++ b/sync/bin/find_unloaded_relations.py
@@ -1,8 +1,8 @@
-""" find_unloaded_relations.py
- Find referenced DOIs that have not been loaded
+"""find_unloaded_relations.py
+Find referenced DOIs that have not been loaded
"""
-__version__ = '1.0.0'
+__version__ = "1.0.0"
import argparse
from operator import attrgetter
@@ -14,15 +14,21 @@
# Database
DB = {}
# References
-REFERENCES = ("has-preprint", "is-preprint-of", "is-supplement-to", "is-supplemented-by")
+REFERENCES = (
+ "has-preprint",
+ "is-preprint-of",
+ "is-supplement-to",
+ "is-supplemented-by",
+)
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -31,21 +37,27 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.read")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -53,13 +65,13 @@ def initialize_program():
def processing():
- ''' Main processing routine
- Keyword arguments:
- None
- Returns:
- None
- '''
- coll = DB['dis'].dois
+ """Main processing routine
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
+ coll = DB["dis"].dois
loaded_dois = {}
LOGGER.info("Finding DOIs")
try:
@@ -67,7 +79,7 @@ def processing():
except Exception as err:
terminate_program(err)
for row in rows:
- loaded_dois[row['doi']] = True
+ loaded_dois[row["doi"]] = True
LOGGER.info(f"Loaded DOIs: {len(loaded_dois):,}")
unloaded = {}
LOGGER.info("Finding unloaded supplements")
@@ -81,8 +93,8 @@ def processing():
for rel in relations:
if rel in REFERENCES:
for itm in relations[rel]:
- if itm['id-type'] == 'doi' and itm['id'] not in loaded_dois:
- unloaded[itm['id']] = True
+ if itm["id-type"] == "doi" and itm["id"] not in loaded_dois:
+ unloaded[itm["id"]] = True
if unloaded:
with open("unloaded_relations.txt", "w", encoding="ascii") as file:
for doi in unloaded:
@@ -92,16 +104,32 @@ def processing():
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Find referenced DOIs that have not been loaded")
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ description="Find referenced DOIs that have not been loaded"
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/sync/bin/pull_arxiv.py b/sync/bin/pull_arxiv.py
index d32afff..ce68efa 100644
--- a/sync/bin/pull_arxiv.py
+++ b/sync/bin/pull_arxiv.py
@@ -1,5 +1,5 @@
-""" pull_arxiv.py
- Find DOIs from arXiv that can be added to the dois collection.
+"""pull_arxiv.py
+Find DOIs from arXiv that can be added to the dois collection.
"""
import argparse
@@ -21,12 +21,12 @@
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -35,22 +35,24 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
- manifold = ARG.MANIFOLD if source == 'dis' else 'prod'
+ manifold = ARG.MANIFOLD if source == "dis" else "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -58,26 +60,26 @@ def initialize_program():
def doi_exists(doi):
- ''' Check if DOI exists in the database
- Keyword arguments:
- doi: DOI to check
- Returns:
- True if exists, False otherwise
- '''
+ """Check if DOI exists in the database
+ Keyword arguments:
+ doi: DOI to check
+ Returns:
+ True if exists, False otherwise
+ """
try:
- row = DB['dis']['dois'].find_one({"doi": doi})
+ row = DB["dis"]["dois"].find_one({"doi": doi})
except Exception as err:
terminate_program(err)
return bool(row)
def get_dois_from_arxiv():
- ''' Get DOIs from arXiv
- Keyword arguments:
- None
- Returns:
- List of DOIs
- '''
+ """Get DOIs from arXiv
+ Keyword arguments:
+ None
+ Returns:
+ List of DOIs
+ """
offset = 0
batch_size = 10
done = False
@@ -89,8 +91,8 @@ def get_dois_from_arxiv():
query = f"all:janelia{post}"
LOGGER.debug(query)
response = JRC.call_arxiv(query)
- if 'feed' in response and 'entry' in response['feed']:
- entry = response['feed']['entry']
+ if "feed" in response and "entry" in response["feed"]:
+ entry = response["feed"]["entry"]
parts += 1
LOGGER.debug(f"Part {parts:,} with {len(entry):,} entries")
if len(entry) < batch_size:
@@ -98,19 +100,19 @@ def get_dois_from_arxiv():
else:
offset += batch_size
for item in entry:
- COUNT['read'] += 1
+ COUNT["read"] += 1
if not isinstance(item, dict):
LOGGER.error(f"Item is not a dictionary: {item}")
continue
try:
- doi = item['id'].split('/')[-1]
+ doi = item["id"].split("/")[-1]
except Exception as err:
print(json.dumps(item, indent=2))
terminate_program(err)
doi = re.sub(r"v\d+$", "", doi) # Remove version
doi = f"10.48550/arxiv.{doi}"
if doi_exists(doi.lower()):
- COUNT['in_dois'] += 1
+ COUNT["in_dois"] += 1
continue
check[doi.lower()] = item
else:
@@ -120,25 +122,25 @@ def get_dois_from_arxiv():
def parse_authors(doi, msg, ready, review):
- ''' Parse an author record to see if there are any Janelia authors
- Keyword arguments:
- doi: DOI
- msg: DataCite message
- ready: list of DOIs ready for processing
- review: list of DOIs requiring review
- Returns:
- True if there are Janelia authors, otherwise False
- '''
- adet = DL.get_author_details(msg, DB['dis']['orcid'])
+ """Parse an author record to see if there are any Janelia authors
+ Keyword arguments:
+ doi: DOI
+ msg: DataCite message
+ ready: list of DOIs ready for processing
+ review: list of DOIs requiring review
+ Returns:
+ True if there are Janelia authors, otherwise False
+ """
+ adet = DL.get_author_details(msg, DB["dis"]["orcid"])
if adet:
janelians = []
mode = None
for auth in adet:
- if auth['janelian']:
+ if auth["janelian"]:
janelians.append(f"{auth['given']} {auth['family']} ({auth['match']})")
- if auth['match'] in ("ORCID", "asserted"):
- COUNT['asserted'] += 1
- mode = auth['match']
+ if auth["match"] in ("ORCID", "asserted"):
+ COUNT["asserted"] += 1
+ mode = auth["match"]
if janelians:
print(f"Janelians found for {doi}: {', '.join(janelians)}")
if mode:
@@ -150,29 +152,29 @@ def parse_authors(doi, msg, ready, review):
def run_search():
- ''' Search for DOIs on arXiv that can be added to the dois collection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Search for DOIs on arXiv that can be added to the dois collection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
check = get_dois_from_arxiv()
ready = []
review = []
- for doi, item in tqdm(check.items(), desc='DataCite check'):
+ for doi, item in tqdm(check.items(), desc="DataCite check"):
resp = JRC.call_datacite(doi)
- if resp and 'data' in resp:
- janelians = parse_authors(doi, resp['data']['attributes'], ready, review)
+ if resp and "data" in resp:
+ janelians = parse_authors(doi, resp["data"]["attributes"], ready, review)
if not janelians:
- COUNT['no_janelians'] += 1
+ COUNT["no_janelians"] += 1
if ready:
LOGGER.info("Writing DOIs to arxiv_ready.txt")
- with open('arxiv_ready.txt', 'w', encoding='ascii') as outstream:
+ with open("arxiv_ready.txt", "w", encoding="ascii") as outstream:
for item in ready:
outstream.write(f"{item}\n")
if review:
LOGGER.info("Writing DOIs to arxiv_review.txt")
- with open('arxiv_review.txt', 'w', encoding='ascii') as outstream:
+ with open("arxiv_review.txt", "w", encoding="ascii") as outstream:
for item in review:
outstream.write(f"{item}\n")
print(f"DOIs read from arXiv: {COUNT['read']:,}")
@@ -183,18 +185,33 @@ def run_search():
print(f"DOIs ready for processing: {len(ready):,}")
print(f"DOIs requiring review: {len(review):,}")
+
# -----------------------------------------------------------------------------
if __name__ == "__main__":
- PARSER = argparse.ArgumentParser(
- description="Sync DOIs from arXiv")
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ PARSER = argparse.ArgumentParser(description="Sync DOIs from arXiv")
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/sync/bin/pull_biorxiv.py b/sync/bin/pull_biorxiv.py
index 229d802..01d5579 100644
--- a/sync/bin/pull_biorxiv.py
+++ b/sync/bin/pull_biorxiv.py
@@ -1,5 +1,5 @@
-""" pull_biorxiv.py
- Find DOIs from bioRxiv that can be added to the dois collection.
+"""pull_biorxiv.py
+Find DOIs from bioRxiv that can be added to the dois collection.
"""
import argparse
@@ -20,12 +20,12 @@
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -34,22 +34,24 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
- manifold = ARG.MANIFOLD if source == 'dis' else 'prod'
+ manifold = ARG.MANIFOLD if source == "dis" else "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -57,26 +59,26 @@ def initialize_program():
def doi_exists(doi):
- ''' Check if DOI exists in the database
- Keyword arguments:
- doi: DOI to check
- Returns:
- True if exists, False otherwise
- '''
+ """Check if DOI exists in the database
+ Keyword arguments:
+ doi: DOI to check
+ Returns:
+ True if exists, False otherwise
+ """
try:
- row = DB['dis']['dois'].find_one({"doi": doi})
+ row = DB["dis"]["dois"].find_one({"doi": doi})
except Exception as err:
terminate_program(err)
return bool(row)
def get_dois_from_biorxiv():
- ''' Get DOIs from bioRxiv
- Keyword arguments:
- None
- Returns:
- List of DOIs
- '''
+ """Get DOIs from bioRxiv
+ Keyword arguments:
+ None
+ Returns:
+ List of DOIs
+ """
start = str(date.today() - timedelta(days=ARG.DAYS))
stop = str(date.today())
offset = 0
@@ -87,68 +89,72 @@ def get_dois_from_biorxiv():
while not done:
query = f"{start}/{stop}/{offset}"
response = JRC.call_biorxiv(query)
- if 'messages' in response:
+ if "messages" in response:
parts += 1
- if 'count' in response['messages'][0]:
- if response['messages'][0]['count'] < 100:
+ if "count" in response["messages"][0]:
+ if response["messages"][0]["count"] < 100:
done = True
else:
offset += 100
else:
done = True
continue
- if 'collection' in response:
- for item in response['collection']:
- COUNT['read'] += 1
- if doi_exists(item['doi'].lower()):
- COUNT['in_dois'] += 1
+ if "collection" in response:
+ for item in response["collection"]:
+ COUNT["read"] += 1
+ if doi_exists(item["doi"].lower()):
+ COUNT["in_dois"] += 1
continue
- check[item['doi'].lower()] = item
+ check[item["doi"].lower()] = item
LOGGER.info(f"Got {len(check):,} DOIs from bioRxiv in {parts} part(s)")
return check
def check_corresponding_institution(item, resp, ready):
- ''' Parse an author record to see if there are any Janelia authors
- Keyword arguments:
- item: bioRxiv item
- resp: response from Crossref
- ready: list of DOIs ready for processing
- Returns:
- True or False
- '''
-
- if 'author_corresponding_institution' in item \
- and 'Janelia' in item['author_corresponding_institution']:
- if resp and 'message' in resp:
+ """Parse an author record to see if there are any Janelia authors
+ Keyword arguments:
+ item: bioRxiv item
+ resp: response from Crossref
+ ready: list of DOIs ready for processing
+ Returns:
+ True or False
+ """
+
+ if (
+ "author_corresponding_institution" in item
+ and "Janelia" in item["author_corresponding_institution"]
+ ):
+ if resp and "message" in resp:
LOGGER.info(f"Janelia found as corresponding institution for {item['doi']}")
- ready.append(item['doi'].lower())
+ ready.append(item["doi"].lower())
return True
else:
- COUNT['asserted_crossref'] += 1
- LOGGER.error(f"{item['doi']} with Janelia corresponding institution not in Crossref")
+ COUNT["asserted_crossref"] += 1
+ LOGGER.error(
+ f"{item['doi']} with Janelia corresponding institution not in Crossref"
+ )
return False
def parse_authors(doi, msg, ready, review):
- ''' Parse an author record to see if there are any Janelia authors
- Keyword arguments:
- doi: DOI
- msg: Crossref message
- ready: list of DOIs ready for processing
- review: list of DOIs requiring review
- Returns:
- True if there are Janelia authors, otherwise False
- '''
- adet = DL.get_author_details(msg, DB['dis']['orcid'])
+ """Parse an author record to see if there are any Janelia authors
+ Keyword arguments:
+ doi: DOI
+ msg: Crossref message
+ ready: list of DOIs ready for processing
+ review: list of DOIs requiring review
+ Returns:
+ True if there are Janelia authors, otherwise False
+ """
+ adet = DL.get_author_details(msg, DB["dis"]["orcid"])
if adet:
janelians = []
mode = None
for auth in adet:
- if auth['janelian']:
+ if auth["janelian"]:
janelians.append(f"{auth['given']} {auth['family']} ({auth['match']})")
- if auth['match'] in ("ORCID", "asserted"):
- mode = auth['match']
+ if auth["match"] in ("ORCID", "asserted"):
+ mode = auth["match"]
if janelians:
print(f"Janelians found for {doi}: {', '.join(janelians)}")
if mode:
@@ -160,31 +166,31 @@ def parse_authors(doi, msg, ready, review):
def run_search():
- ''' Search for DOIs on bioRxiv that can be added to the dois collection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Search for DOIs on bioRxiv that can be added to the dois collection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
check = get_dois_from_biorxiv()
ready = []
review = []
- for doi, item in tqdm(check.items(), desc='Crossref check'):
+ for doi, item in tqdm(check.items(), desc="Crossref check"):
resp = JRC.call_crossref(doi)
if check_corresponding_institution(item, resp, ready):
continue
- if resp and 'message' in resp:
- janelians = parse_authors(doi, resp['message'], ready, review)
+ if resp and "message" in resp:
+ janelians = parse_authors(doi, resp["message"], ready, review)
if not janelians:
- COUNT['no_janelians'] += 1
+ COUNT["no_janelians"] += 1
if ready:
LOGGER.info("Writing DOIs to biorxiv_ready.txt")
- with open('biorxiv_ready.txt', 'w', encoding='ascii') as outstream:
+ with open("biorxiv_ready.txt", "w", encoding="ascii") as outstream:
for item in ready:
outstream.write(f"{item}\n")
if review:
LOGGER.info("Writing DOIs to biorxiv_review.txt")
- with open('biorxiv_review.txt', 'w', encoding='ascii') as outstream:
+ with open("biorxiv_review.txt", "w", encoding="ascii") as outstream:
for item in review:
outstream.write(f"{item}\n")
print(f"DOIs read from bioRxiv: {COUNT['read']:,}")
@@ -195,21 +201,41 @@ def run_search():
print(f"DOIs ready for processing: {len(ready):,}")
print(f"DOIs requiring review: {len(review):,}")
+
# -----------------------------------------------------------------------------
if __name__ == "__main__":
- PARSER = argparse.ArgumentParser(
- description="Sync DOIs from bioRxiv")
- PARSER.add_argument('--days', dest='DAYS', action='store',
- default=7, type=int,
- help='Number of days to go back for DOIs')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ PARSER = argparse.ArgumentParser(description="Sync DOIs from bioRxiv")
+ PARSER.add_argument(
+ "--days",
+ dest="DAYS",
+ action="store",
+ default=7,
+ type=int,
+ help="Number of days to go back for DOIs",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/sync/bin/pull_figshare.py b/sync/bin/pull_figshare.py
index d3e2b25..153e7fe 100644
--- a/sync/bin/pull_figshare.py
+++ b/sync/bin/pull_figshare.py
@@ -1,6 +1,6 @@
-''' pull_figshare.py
- Pull resources from figshare
-'''
+"""pull_figshare.py
+Pull resources from figshare
+"""
import argparse
import collections
@@ -16,13 +16,14 @@
DB = {}
COUNT = collections.defaultdict(lambda: 0, {})
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -31,20 +32,26 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize database connection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize database connection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -52,28 +59,28 @@ def initialize_program():
def doi_exists(doi):
- ''' Check if DOI exists in the database
- Keyword arguments:
- doi: DOI to check
- Returns:
- True if exists, False otherwise
- '''
+ """Check if DOI exists in the database
+ Keyword arguments:
+ doi: DOI to check
+ Returns:
+ True if exists, False otherwise
+ """
try:
- row = DB['dis']['dois'].find_one({"doi": doi})
+ row = DB["dis"]["dois"].find_one({"doi": doi})
except Exception as err:
terminate_program(err)
return bool(row)
def pull_single_group(dois, institution=None, group=None):
- ''' Pull DOIs for one group
- Keyword arguments:
- dois: list of DOIs to process
- institution: institution to process
- group: figshare group to process
- Returns:
- None
- '''
+ """Pull DOIs for one group
+ Keyword arguments:
+ dois: list of DOIs to process
+ institution: institution to process
+ group: figshare group to process
+ Returns:
+ None
+ """
if institution:
stype = "institution"
sterm = institution
@@ -91,13 +98,13 @@ def pull_single_group(dois, institution=None, group=None):
parts += 1
data = resp.json()
for art in data:
- COUNT['checked'] += 1
- if art['doi'].startswith("10.25378"):
- COUNT['janelia'] += 1
- if doi_exists(art['doi']):
- COUNT['in_dois'] += 1
+ COUNT["checked"] += 1
+ if art["doi"].startswith("10.25378"):
+ COUNT["janelia"] += 1
+ if doi_exists(art["doi"]):
+ COUNT["in_dois"] += 1
else:
- dois.append(art['doi'].lower())
+ dois.append(art["doi"].lower())
offset += 500
else:
done = True
@@ -105,16 +112,16 @@ def pull_single_group(dois, institution=None, group=None):
def pull_figshare():
- ''' Pull DOIs from figshare
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Pull DOIs from figshare
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
dois = []
pull_single_group(dois, institution=295)
- #for group in (11380, 49461):
+ # for group in (11380, 49461):
# pull_single_group(dois, group=group)
if dois:
LOGGER.info(f"Got {len(dois):,} DOIs from figshare")
@@ -127,21 +134,36 @@ def pull_figshare():
print(f"DOIs already in database: {COUNT['in_dois']:,}")
print(f"DOIs ready for processing: {len(dois)}")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Pull resources from figshare")
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Pull resources from figshare")
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
CONFIG = configparser.ConfigParser()
- CONFIG.read('config.ini')
+ CONFIG.read("config.ini")
pull_figshare()
diff --git a/sync/bin/pull_oa.py b/sync/bin/pull_oa.py
index 1510d33..9110d42 100644
--- a/sync/bin/pull_oa.py
+++ b/sync/bin/pull_oa.py
@@ -1,5 +1,5 @@
-""" pull_oa.py
- Find DOIs from OA that can be added to the dois collection.
+"""pull_oa.py
+Find DOIs from OA that can be added to the dois collection.
"""
import argparse
@@ -19,12 +19,12 @@
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -33,22 +33,24 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
- manifold = ARG.MANIFOLD if source == 'dis' else 'prod'
+ manifold = ARG.MANIFOLD if source == "dis" else "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -56,26 +58,26 @@ def initialize_program():
def doi_exists(doi):
- ''' Check if DOI exists in the database
- Keyword arguments:
- doi: DOI to check
- Returns:
- True if exists, False otherwise
- '''
+ """Check if DOI exists in the database
+ Keyword arguments:
+ doi: DOI to check
+ Returns:
+ True if exists, False otherwise
+ """
try:
- row = DB['dis']['dois'].find_one({"doi": doi})
+ row = DB["dis"]["dois"].find_one({"doi": doi})
except Exception as err:
terminate_program(err)
return bool(row)
def get_dois_from_oa():
- ''' Get DOIs from oa
- Keyword arguments:
- None
- Returns:
- List of DOIs
- '''
+ """Get DOIs from oa
+ Keyword arguments:
+ None
+ Returns:
+ List of DOIs
+ """
size = 250
start = 0
done = False
@@ -89,17 +91,21 @@ def get_dois_from_oa():
else:
suffix = f"&size={size}"
response = JRC.call_oa(suffix=suffix)
- if 'hits' not in response:
+ if "hits" not in response:
terminate_program(f"Error in response from OA: {response}")
- for hit in response['hits']['hits']:
- COUNT['read'] += 1
- if '_source' in hit and 'DOI' in hit['_source'] and hit['_source']['DOI']:
- doi = hit['_source']['DOI'].lower()
+ for hit in response["hits"]["hits"]:
+ COUNT["read"] += 1
+ if "_source" in hit and "DOI" in hit["_source"] and hit["_source"]["DOI"]:
+ doi = hit["_source"]["DOI"].lower()
if doi_exists(doi.lower()):
- COUNT['in_dois'] += 1
+ COUNT["in_dois"] += 1
continue
check[doi] = hit
- if 'hits' in response and 'hits' in response['hits'] and len(response['hits']['hits']) > 0:
+ if (
+ "hits" in response
+ and "hits" in response["hits"]
+ and len(response["hits"]["hits"]) > 0
+ ):
parts += 1
start += size
else:
@@ -109,19 +115,19 @@ def get_dois_from_oa():
def parse_authors(doi, msg, ready):
- ''' Parse an author record to see if there are any Janelia authors
- Keyword arguments:
- doi: DOI
- msg: Crossref message
- ready: list of DOIs ready for processing
- Returns:
- True if there are Janelia authors, otherwise False
- '''
- adet = DL.get_author_details(msg, DB['dis']['orcid'])
+ """Parse an author record to see if there are any Janelia authors
+ Keyword arguments:
+ doi: DOI
+ msg: Crossref message
+ ready: list of DOIs ready for processing
+ Returns:
+ True if there are Janelia authors, otherwise False
+ """
+ adet = DL.get_author_details(msg, DB["dis"]["orcid"])
if adet:
janelians = []
for auth in adet:
- if auth['janelian']:
+ if auth["janelian"]:
janelians.append(f"{auth['given']} {auth['family']} ({auth['match']})")
if janelians:
print(f"Janelians found for {doi}: {', '.join(janelians)}")
@@ -134,32 +140,32 @@ def parse_authors(doi, msg, ready):
def run_search():
- ''' Search for DOIs on OA that can be added to the dois collection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Search for DOIs on OA that can be added to the dois collection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
check = get_dois_from_oa()
ready = []
no_janelians = []
- for doi, item in tqdm(check.items(), desc='Crossref check'):
+ for doi, item in tqdm(check.items(), desc="Crossref check"):
if DL.is_datacite(doi):
LOGGER.warning(f"DOI {doi} is a DataCite DOI")
resp = JRC.call_crossref(doi)
- if resp and 'message' in resp:
- janelians = parse_authors(doi, resp['message'], ready)
+ if resp and "message" in resp:
+ janelians = parse_authors(doi, resp["message"], ready)
if not janelians:
- COUNT['no_janelians'] += 1
+ COUNT["no_janelians"] += 1
no_janelians.append(doi)
if ready:
LOGGER.info("Writing DOIs to oa_ready.txt")
- with open('oa_ready.txt', 'w', encoding='ascii') as outstream:
+ with open("oa_ready.txt", "w", encoding="ascii") as outstream:
for item in ready:
outstream.write(f"{item}\n")
if no_janelians:
LOGGER.info("Writing DOIs to oa_no_janelians.txt")
- with open('oa_no_janelians.txt', 'w', encoding='ascii') as outstream:
+ with open("oa_no_janelians.txt", "w", encoding="ascii") as outstream:
for item in no_janelians:
outstream.write(f"{item}\n")
print(f"DOIs read from OA: {COUNT['read']:,}")
@@ -169,18 +175,33 @@ def run_search():
print(f"DOIs with no Janelian authors: {COUNT['no_janelians']:,}")
print(f"DOIs ready for processing: {len(ready):,}")
+
# -----------------------------------------------------------------------------
if __name__ == "__main__":
- PARSER = argparse.ArgumentParser(
- description="Sync DOIs from bioRxiv")
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ PARSER = argparse.ArgumentParser(description="Sync DOIs from bioRxiv")
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/sync/bin/update_dois.py b/sync/bin/update_dois.py
index db9ac80..057f49e 100644
--- a/sync/bin/update_dois.py
+++ b/sync/bin/update_dois.py
@@ -1,13 +1,13 @@
-""" update_dois.py
- Synchronize DOI information from an input source to databases.
- If a single DOI or file of DOIs is specified, these are updated in FlyBoy/config or DIS MongoDB.
- Otherwise, DOIs are synced according to target:
- - flyboy: FLYF2 to FlyBoy and the config system
- - dis: FLYF2, Crossref, DataCite, ALPS releases, EM datasets, and "to process" DOIs
- to DIS MongoDB.
+"""update_dois.py
+Synchronize DOI information from an input source to databases.
+If a single DOI or file of DOIs is specified, these are updated in FlyBoy/config or DIS MongoDB.
+Otherwise, DOIs are synced according to target:
+- flyboy: FLYF2 to FlyBoy and the config system
+- dis: FLYF2, Crossref, DataCite, ALPS releases, EM datasets, and "to process" DOIs
+ to DIS MongoDB.
"""
-__version__ = '7.0.0'
+__version__ = "7.0.0"
import argparse
import configparser
@@ -31,13 +31,16 @@
# Database
DB = {}
-READ = {'dois': "SELECT doi FROM doi_data",}
-WRITE = {'doi': "INSERT INTO doi_data (doi,title,first_author,"
- + "publication_date) VALUES (%s,%s,%s,%s) ON "
- + "DUPLICATE KEY UPDATE title=%s,first_author=%s,"
- + "publication_date=%s",
- 'delete_doi': "DELETE FROM doi_data WHERE doi=%s",
- }
+READ = {
+ "dois": "SELECT doi FROM doi_data",
+}
+WRITE = {
+ "doi": "INSERT INTO doi_data (doi,title,first_author,"
+ + "publication_date) VALUES (%s,%s,%s,%s) ON "
+ + "DUPLICATE KEY UPDATE title=%s,first_author=%s,"
+ + "publication_date=%s",
+ "delete_doi": "DELETE FROM doi_data WHERE doi=%s",
+}
# Configuration
CKEY = {"flyboy": "dois"}
CROSSREF = {}
@@ -52,18 +55,35 @@
# General
PROJECT = {}
SUPORG = {}
-DEFAULT_TAGS = ['Janelia Experimental Technology (jET)', 'Scientific Computing Software']
-COUNT = {'crossref': 0, 'datacite': 0, 'duplicate': 0, 'found': 0, 'foundc': 0, 'foundd': 0,
- 'notfound': 0, 'noupdate': 0, 'noauthor': 0,
- 'insert': 0, 'update': 0, 'delete': 0, 'foundfb': 0, 'flyboy': 0}
+DEFAULT_TAGS = [
+ "Janelia Experimental Technology (jET)",
+ "Scientific Computing Software",
+]
+COUNT = {
+ "crossref": 0,
+ "datacite": 0,
+ "duplicate": 0,
+ "found": 0,
+ "foundc": 0,
+ "foundd": 0,
+ "notfound": 0,
+ "noupdate": 0,
+ "noauthor": 0,
+ "insert": 0,
+ "update": 0,
+ "delete": 0,
+ "foundfb": 0,
+ "flyboy": 0,
+}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -72,13 +92,16 @@ def terminate_program(msg=None):
def call_responder(server, endpoint, payload=None, timeout=10):
- """ Call a responder
- Keyword arguments:
- server: server
- endpoint: REST endpoint
+ """Call a responder
+ Keyword arguments:
+ server: server
+ endpoint: REST endpoint
"""
- url = ((getattr(getattr(REST, server), "url") if server else "") if "REST" in globals() \
- else (os.environ.get('CONFIG_SERVER_URL') if server else "")) + endpoint
+ url = (
+ (getattr(getattr(REST, server), "url") if server else "")
+ if "REST" in globals()
+ else (os.environ.get("CONFIG_SERVER_URL") if server else "")
+ ) + endpoint
try:
if payload:
return requests.post(url, data=payload, timeout=timeout)
@@ -91,36 +114,38 @@ def call_responder(server, endpoint, payload=None, timeout=10):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['flyboy']
- if ARG.TARGET == 'dis':
- dbs.append('dis')
+ dbs = ["flyboy"]
+ if ARG.TARGET == "dis":
+ dbs.append("dis")
for source in dbs:
- manifold = ARG.MANIFOLD if source == 'dis' else 'prod'
+ manifold = ARG.MANIFOLD if source == "dis" else "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s", dbo.name, manifold, dbo.host, dbo.user
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
- if ARG.TARGET == 'flyboy':
+ if ARG.TARGET == "flyboy":
return
try:
- rows = DB['dis'].project_map.find({})
+ rows = DB["dis"].project_map.find({})
except Exception as err:
terminate_program(err)
for row in rows:
- PROJECT[row['name']] = row['project']
+ PROJECT[row["name"]] = row["project"]
try:
orgs = DL.get_supervisory_orgs()
except Exception as err:
@@ -130,38 +155,44 @@ def initialize_program():
def get_dis_dois_from_mongo():
- ''' Get DOIs from MongoDB
- Keyword arguments:
- None
- Returns:
- Dict keyed by DOI with value set up update date
- '''
- coll = DB['dis'].dois
+ """Get DOIs from MongoDB
+ Keyword arguments:
+ None
+ Returns:
+ Dict keyed by DOI with value set up update date
+ """
+ coll = DB["dis"].dois
result = {}
recs = coll.find({}, {"doi": 1, "updated": 1, "deposited": 1})
for rec in recs:
- if DL.is_datacite(rec['doi']):
+ if DL.is_datacite(rec["doi"]):
if "updated" not in rec:
- terminate_program(f"Could not find updated field for {rec['doi']} (DataCite)")
- result[rec['doi']] = {"updated": rec['updated']}
+ terminate_program(
+ f"Could not find updated field for {rec['doi']} (DataCite)"
+ )
+ result[rec["doi"]] = {"updated": rec["updated"]}
else:
if "deposited" not in rec:
- terminate_program(f"Could not find deposited field for {rec['doi']} (Crossref)")
- result[rec['doi']] = {"deposited": {'date-time': rec['deposited']['date-time']}}
+ terminate_program(
+ f"Could not find deposited field for {rec['doi']} (Crossref)"
+ )
+ result[rec["doi"]] = {
+ "deposited": {"date-time": rec["deposited"]["date-time"]}
+ }
LOGGER.info(f"Got {len(result):,} DOIs from DIS Mongo")
return result
def get_dois_from_crossref():
- ''' Get DOIs from Crossref
- Keyword arguments:
- None
- Returns:
- List of unique DOIs
- '''
+ """Get DOIs from Crossref
+ Keyword arguments:
+ None
+ Returns:
+ List of unique DOIs
+ """
dlist = []
LOGGER.info("Getting DOIs from Crossref")
- suffix = CONFIG['crossref']['janelia']
+ suffix = CONFIG["crossref"]["janelia"]
complete = False
parts = 0
while not complete:
@@ -172,78 +203,78 @@ def get_dois_from_crossref():
resp = JRC.call_crossref(suffix, timeout=20)
except Exception as err:
terminate_program(err)
- recs = resp['message']['items']
+ recs = resp["message"]["items"]
if not recs:
break
parts += 1
for rec in recs:
- COUNT['crossref'] += 1
- doi = rec['doi'] = rec['DOI']
- rec['jrc_obtained_from'] = 'Crossref'
+ COUNT["crossref"] += 1
+ doi = rec["doi"] = rec["DOI"]
+ rec["jrc_obtained_from"] = "Crossref"
if doi in CROSSREF:
- COUNT['duplicate'] += 1
+ COUNT["duplicate"] += 1
continue
dlist.append(doi)
CROSSREF[doi] = {"message": rec}
- if len(dlist) >= resp['message']['total-results']:
+ if len(dlist) >= resp["message"]["total-results"]:
complete = True
LOGGER.info(f"Got {len(dlist):,} DOIs from Crossref in {parts} part(s)")
return dlist
def get_dois_from_datacite(query):
- ''' Get DOIs from DataCite
- Keyword arguments:
- query: query type
- Returns:
- List of unique DOIs
- '''
+ """Get DOIs from DataCite
+ Keyword arguments:
+ query: query type
+ Returns:
+ List of unique DOIs
+ """
dlist = []
LOGGER.info(f"Getting DOIs from DataCite ({query})")
complete = False
- suffix = CONFIG['datacite'][query]
+ suffix = CONFIG["datacite"][query]
parts = 0
while not complete:
try:
- recs = call_responder('datacite', suffix, timeout=20)
+ recs = call_responder("datacite", suffix, timeout=20)
except Exception as err:
terminate_program(err)
parts += 1
- for rec in recs['data']:
- COUNT['datacite'] += 1
- rec['jrc_obtained_from'] = 'DataCite'
- doi = rec['attributes']['doi']
+ for rec in recs["data"]:
+ COUNT["datacite"] += 1
+ rec["jrc_obtained_from"] = "DataCite"
+ doi = rec["attributes"]["doi"]
if doi in DATACITE:
- COUNT['duplicate'] += 1
+ COUNT["duplicate"] += 1
continue
dlist.append(doi)
- DATACITE[doi] = {"data": {"attributes": rec['attributes']}}
- if 'links' in recs and 'next' in recs['links']:
- suffix = recs['links']['next'].replace('https://api.datacite.org/dois', '')
+ DATACITE[doi] = {"data": {"attributes": rec["attributes"]}}
+ if "links" in recs and "next" in recs["links"]:
+ suffix = recs["links"]["next"].replace("https://api.datacite.org/dois", "")
suffix += "&sort=created"
else:
complete = True
LOGGER.info(f"Got {len(dlist):,} DOIs from DataCite in {parts} part(s) for {query}")
LOGGER.info(f"Writing DOIs to datacite_{query}_dois.txt")
- with open(f"datacite_{query}_dois.txt", "w", encoding='ascii') as outstream:
+ with open(f"datacite_{query}_dois.txt", "w", encoding="ascii") as outstream:
for doi in dlist:
outstream.write(f"{doi}\n")
return dlist
def add_to_be_processed(dlist):
- ''' Add DOIs from the dois_to_process collection
- Keyword arguments:
- dlist: list of DOIs
- Returns:
- None
- '''
+ """Add DOIs from the dois_to_process collection
+ Keyword arguments:
+ dlist: list of DOIs
+ Returns:
+ None
+ """
try:
- rows = DB['dis'].dois_to_process.find({})
+ rows = DB["dis"].dois_to_process.find({})
except Exception as err:
terminate_program(err)
for row in rows:
- doi = row['doi']
+ doi = row["doi"]
if doi not in dlist:
TO_BE_PROCESSED.append(doi)
dlist.append(doi)
@@ -252,44 +283,44 @@ def add_to_be_processed(dlist):
def get_dois_for_dis(flycore):
- ''' Get a list of DOIs to process for an update of the DIS database. Sources are:
- - DOIs with an affiliation of Janelia from Crossref
- - All Janelia-prefixed DOIs from DataCite
- - DOIs with an affiliation of Janelia from DataCite
- - DOIs in use by FLYF2
- - DOIs associated with ALPs releases
- - DOIs associated with FlyEM datasets
- - DOIs from dois_to_process collection
- - DOIs that are already in the DIS database
- Keyword arguments:
- flycore: list of DOIs from FlyCore
- Returns:
- Dict with a single "dois" key and value of a list of DOIs
- '''
+ """Get a list of DOIs to process for an update of the DIS database. Sources are:
+ - DOIs with an affiliation of Janelia from Crossref
+ - All Janelia-prefixed DOIs from DataCite
+ - DOIs with an affiliation of Janelia from DataCite
+ - DOIs in use by FLYF2
+ - DOIs associated with ALPs releases
+ - DOIs associated with FlyEM datasets
+ - DOIs from dois_to_process collection
+ - DOIs that are already in the DIS database
+ Keyword arguments:
+ flycore: list of DOIs from FlyCore
+ Returns:
+ Dict with a single "dois" key and value of a list of DOIs
+ """
# Crossref
dlist = get_dois_from_crossref()
# DataCite
dlist.extend(get_dois_from_datacite("janelia"))
dlist.extend(get_dois_from_datacite("affiliation"))
# FlyCore
- for doi in flycore['dois']:
- if doi not in dlist and 'in prep' not in doi:
+ for doi in flycore["dois"]:
+ if doi not in dlist and "in prep" not in doi:
dlist.append(doi)
# ALPS releases
- releases = JRC.simplenamespace_to_dict(JRC.get_config('releases'))
+ releases = JRC.simplenamespace_to_dict(JRC.get_config("releases"))
cnt = 0
for val in releases.values():
- if 'doi' in val:
- for dtype in ('dataset', 'preprint', 'publication'):
- if dtype in val['doi'] and val['doi'][dtype] not in dlist:
+ if "doi" in val:
+ for dtype in ("dataset", "preprint", "publication"):
+ if dtype in val["doi"] and val["doi"][dtype] not in dlist:
cnt += 1
- dlist.append(val['doi'][dtype])
+ dlist.append(val["doi"][dtype])
LOGGER.info(f"Got {cnt:,} DOIs from ALPS releases")
# EM datasets
- emdois = JRC.simplenamespace_to_dict(JRC.get_config('em_dois'))
+ emdois = JRC.simplenamespace_to_dict(JRC.get_config("em_dois"))
cnt = 0
for key, val in emdois.items():
- if key in DISCONFIG['em_dataset_ignore']:
+ if key in DISCONFIG["em_dataset_ignore"]:
continue
if val and isinstance(val, str):
cnt += 1
@@ -309,16 +340,16 @@ def get_dois_for_dis(flycore):
def get_dois():
- ''' Get a list of DOIs to process. This will be one of four things:
- - a single DOI from ARG.DOI
- - a list of DOIs from ARG.FILE
- - DOIs needed for an update of the DIS database
- - DOIs from FLYF2
- Keyword arguments:
- None
- Returns:
- Dict with a single "dois" key and value of a list of DOIs
- '''
+ """Get a list of DOIs to process. This will be one of four things:
+ - a single DOI from ARG.DOI
+ - a list of DOIs from ARG.FILE
+ - DOIs needed for an update of the DIS database
+ - DOIs from FLYF2
+ Keyword arguments:
+ None
+ Returns:
+ Dict with a single "dois" key and value of a list of DOIs
+ """
if ARG.DOI:
return {"dois": [ARG.DOI]}
if ARG.FILE:
@@ -336,18 +367,18 @@ def get_dois():
break
if piped:
return {"dois": inp.splitlines()}
- flycore = call_responder('flycore', '?request=doilist')
+ flycore = call_responder("flycore", "?request=doilist")
LOGGER.info(f"Got {len(flycore['dois']):,} DOIs from FLYF2")
- if ARG.TARGET == 'dis':
+ if ARG.TARGET == "dis":
return get_dois_for_dis(flycore)
# Default is to pull from FlyCore
return flycore
def call_crossref(doi):
- """ Get DOI information from crossref
- Keyword arguments:
- doi: DOI
+ """Get DOI information from crossref
+ Keyword arguments:
+ doi: DOI
"""
try:
req = JRC.call_crossref(doi)
@@ -355,17 +386,17 @@ def call_crossref(doi):
terminate_program(err)
if req:
return req
- COUNT['notfound'] += 1
+ COUNT["notfound"] += 1
MISSING[f"Could not find {doi} in Crossref"] = True
raise Exception(f"Could not find {doi} in Crossref")
def call_crossref_with_retry(doi):
- """ Looping function for call_crossref
- Keyword arguments:
- doi: DOI
- Returns:
- msg: response from crossref.org
+ """Looping function for call_crossref
+ Keyword arguments:
+ doi: DOI
+ Returns:
+ msg: response from crossref.org
"""
attempt = MAX_CROSSREF_TRIES
msg = None
@@ -374,42 +405,44 @@ def call_crossref_with_retry(doi):
msg = call_crossref(doi)
except Exception as err:
raise Exception(err) from err
- if 'title' in msg['message']:
- if 'author' in msg['message']:
+ if "title" in msg["message"]:
+ if "author" in msg["message"]:
break
MISSING[f"No author for {doi}"] = True
LOGGER.warning(f"No author for {doi}")
- COUNT['noauthor'] += 1
+ COUNT["noauthor"] += 1
return None
LOGGER.warning(f"No title for {doi}")
MISSING[f"No title for {doi}"] = True
attempt -= 1
- LOGGER.warning(f"Missing data from crossref.org for {doi}: retrying ({attempt})")
+ LOGGER.warning(
+ f"Missing data from crossref.org for {doi}: retrying ({attempt})"
+ )
sleep(0.5)
return msg
def call_datacite(doi):
- """ Get record from DataCite
- Keyword arguments:
- doi: DOI
- Returns:
- rec: response from crossref.org
+ """Get record from DataCite
+ Keyword arguments:
+ doi: DOI
+ Returns:
+ rec: response from crossref.org
"""
rec = DATACITE[doi] if doi in DATACITE else JRC.call_datacite(doi)
if rec:
return rec
- COUNT['notfound'] += 1
+ COUNT["notfound"] += 1
MISSING[f"Could not find {doi} in DataCite"] = True
raise Exception(f"Could not find {doi} in DataCite")
def get_doi_record(doi):
- """ Return the record for a single DOI
- Keyword arguments:
- doi: DOI
- Returns:
- record for a single DOI
+ """Return the record for a single DOI
+ Keyword arguments:
+ doi: DOI
+ Returns:
+ record for a single DOI
"""
msg = None
if DL.is_datacite(doi):
@@ -436,32 +469,32 @@ def get_doi_record(doi):
def convert_timestamp(stamp):
- """ Convert a Crossref or DataCite stamp to a standard format
- Keyword arguments:
- stamp: timestamp
- Returns:
- Converted timestamp
+ """Convert a Crossref or DataCite stamp to a standard format
+ Keyword arguments:
+ stamp: timestamp
+ Returns:
+ Converted timestamp
"""
- return re.sub(r'\.\d+Z', 'Z', stamp)
+ return re.sub(r"\.\d+Z", "Z", stamp)
def crossref_needs_update(doi, msg):
- """ Determine if a Crossref DOI needs updating on our system
- Keyword arguments:
- doi: DOI
- msg: record from Crossref
- Returns:
- True or False
+ """Determine if a Crossref DOI needs updating on our system
+ Keyword arguments:
+ doi: DOI
+ msg: record from Crossref
+ Returns:
+ True or False
"""
- if 'deposited' not in msg or 'date-time' not in msg['deposited']:
+ if "deposited" not in msg or "date-time" not in msg["deposited"]:
return True
if doi not in EXISTING:
return True
rec = EXISTING[doi]
- if 'deposited' not in rec or 'date-time' not in rec['deposited']:
+ if "deposited" not in rec or "date-time" not in rec["deposited"]:
return True
- stored = convert_timestamp(rec['deposited']['date-time'])
- new = convert_timestamp(msg['deposited']['date-time'])
+ stored = convert_timestamp(rec["deposited"]["date-time"])
+ new = convert_timestamp(msg["deposited"]["date-time"])
needs_update = bool(stored != new)
if ARG.FORCE:
needs_update = True
@@ -469,25 +502,25 @@ def crossref_needs_update(doi, msg):
LOGGER.debug(f"Update {doi} {stored} -> {new}")
UPDATED[doi] = f"Deposited {stored} -> {new}"
else:
- COUNT['noupdate'] += 1
+ COUNT["noupdate"] += 1
return needs_update
def datacite_needs_update(doi, msg):
- """ Determine if a DataCite DOI needs updating on our system
- Keyword arguments:
- doi: DOI
- msg: record from DataCite
- Returns:
- True or False
+ """Determine if a DataCite DOI needs updating on our system
+ Keyword arguments:
+ doi: DOI
+ msg: record from DataCite
+ Returns:
+ True or False
"""
- if 'attributes' not in msg or 'updated' not in msg['attributes']:
+ if "attributes" not in msg or "updated" not in msg["attributes"]:
return True
if doi not in EXISTING:
return True
rec = EXISTING[doi]
- stored = convert_timestamp(rec['updated'])
- new = convert_timestamp(msg['attributes']['updated'])
+ stored = convert_timestamp(rec["updated"])
+ new = convert_timestamp(msg["attributes"]["updated"])
needs_update = bool(stored != new)
if ARG.FORCE:
needs_update = True
@@ -495,47 +528,47 @@ def datacite_needs_update(doi, msg):
LOGGER.debug(f"Update {doi} {stored} -> {new}")
UPDATED[doi] = f"Updated {stored} -> {new}"
else:
- COUNT['noupdate'] += 1
+ COUNT["noupdate"] += 1
return needs_update
def get_flyboy_attributes(msg):
- """ Get needed attributed from a Crossref or DataCite record
- Keyword arguments:
- msg: Crossref or DataCite record
- Returns:
- title: article title
- author: article first author
- date: publication year
+ """Get needed attributed from a Crossref or DataCite record
+ Keyword arguments:
+ msg: Crossref or DataCite record
+ Returns:
+ title: article title
+ author: article first author
+ date: publication year
"""
title = author = None
date = DL.get_publishing_date(msg)
- if 'DOI' in msg:
+ if "DOI" in msg:
# Crossref
- if 'title' in msg:
- title = msg['title'][0]
- if 'author' in msg:
- author = msg['author'][0]['family']
- date = date.split('-')[0] if '-' in date else date
+ if "title" in msg:
+ title = msg["title"][0]
+ if "author" in msg:
+ author = msg["author"][0]["family"]
+ date = date.split("-")[0] if "-" in date else date
else:
# DataCite
- if 'titles' in msg:
- title = msg['titles'][0]['title']
- if 'creators' in msg and 'familyName' in msg['creators'][0]:
- author = msg['creators'][0]['familyName']
- if 'publicationYear' in msg:
- date = str(msg['publicationYear'])
+ if "titles" in msg:
+ title = msg["titles"][0]["title"]
+ if "creators" in msg and "familyName" in msg["creators"][0]:
+ author = msg["creators"][0]["familyName"]
+ if "publicationYear" in msg:
+ date = str(msg["publicationYear"])
else:
- date = date.split('-')[0] if '-' in date else date
+ date = date.split("-")[0] if "-" in date else date
return title, author, date
def update_flyboy(persist):
- """ Update FlyBoy for a single DOI
- Keyword arguments:
- persist: persist dict
- Returns:
- None
+ """Update FlyBoy for a single DOI
+ Keyword arguments:
+ persist: persist dict
+ Returns:
+ None
"""
for doi, val in persist.items():
title, author, date = get_flyboy_attributes(val)
@@ -544,131 +577,135 @@ def update_flyboy(persist):
if not author:
LOGGER.error("Missing author for %s (%s)", doi, title)
LOGGER.debug("%s: %s (%s, %s)", doi, title, author, date)
- COUNT['flyboy'] += 1
+ COUNT["flyboy"] += 1
title = unidecode(title)
- LOGGER.debug(WRITE['doi'], doi, title, author, date, title, author, date)
+ LOGGER.debug(WRITE["doi"], doi, title, author, date, title, author, date)
if ARG.WRITE:
try:
- DB['flyboy']['cursor'].execute(WRITE['doi'], (doi, title, author, date,
- title, author, date))
+ DB["flyboy"]["cursor"].execute(
+ WRITE["doi"], (doi, title, author, date, title, author, date)
+ )
except MySQLdb.Error as err:
terminate_program(err)
def perform_backcheck(cdict):
- """ Find and delete records that are in FlyBoy that aren't in our config
- Keyword arguments:
- cdict: dict of DOIs in config
- Returns:
- None
+ """Find and delete records that are in FlyBoy that aren't in our config
+ Keyword arguments:
+ cdict: dict of DOIs in config
+ Returns:
+ None
"""
try:
- DB['flyboy']['cursor'].execute(READ['dois'])
+ DB["flyboy"]["cursor"].execute(READ["dois"])
except MySQLdb.Error as err:
terminate_program(err)
- rows = DB['flyboy']['cursor'].fetchall()
- for row in tqdm(rows, desc='Backcheck'):
- COUNT['foundfb'] += 1
- if row['doi'] not in cdict:
- LOGGER.warning(WRITE['delete_doi'], (row['doi']))
+ rows = DB["flyboy"]["cursor"].fetchall()
+ for row in tqdm(rows, desc="Backcheck"):
+ COUNT["foundfb"] += 1
+ if row["doi"] not in cdict:
+ LOGGER.warning(WRITE["delete_doi"], (row["doi"]))
if ARG.WRITE:
try:
- DB['flyboy']['cursor'].execute(WRITE['delete_doi'], (row['doi'],))
+ DB["flyboy"]["cursor"].execute(WRITE["delete_doi"], (row["doi"],))
except MySQLdb.Error as err:
terminate_program(err)
- COUNT['delete'] += 1
+ COUNT["delete"] += 1
def update_config_database(persist):
- """ Update the configuration database
- Keyword arguments:
- persist: dict of DOIs to update
- Returns:
- None
+ """Update the configuration database
+ Keyword arguments:
+ persist: dict of DOIs to update
+ Returns:
+ None
"""
if not ARG.WRITE:
return
- for key, val in tqdm(persist.items(), desc='Update config'):
+ for key, val in tqdm(persist.items(), desc="Update config"):
LOGGER.debug(f"Updating {key} in config database")
- resp = call_responder('config', f"importjson/{CKEY[ARG.TARGET]}/{key}",
- {"config": json.dumps(val)})
+ resp = call_responder(
+ "config",
+ f"importjson/{CKEY[ARG.TARGET]}/{key}",
+ {"config": json.dumps(val)},
+ )
if resp.status_code != 200:
- LOGGER.error(resp.json()['rest']['message'])
+ LOGGER.error(resp.json()["rest"]["message"])
else:
rest = resp.json()
- if 'inserted' in rest['rest']:
- COUNT['insert'] += rest['rest']['inserted']
- elif 'updated' in rest['rest']:
- COUNT['update'] += rest['rest']['updated']
+ if "inserted" in rest["rest"]:
+ COUNT["insert"] += rest["rest"]["inserted"]
+ elif "updated" in rest["rest"]:
+ COUNT["update"] += rest["rest"]["updated"]
def get_tags(authors):
- ''' Find tags for a DOI using the authors
- Keyword arguments:
- authors: list of detailed authors
- Returns:
- List of tags
- '''
+ """Find tags for a DOI using the authors
+ Keyword arguments:
+ authors: list of detailed authors
+ Returns:
+ List of tags
+ """
new_tags = []
for auth in authors:
- if 'group' in auth and auth['group'] not in new_tags:
- new_tags.append(auth['group'])
- if 'tags' in auth:
+ if "group" in auth and auth["group"] not in new_tags:
+ new_tags.append(auth["group"])
+ if "tags" in auth:
for dtag in DEFAULT_TAGS:
- if dtag in auth['tags'] and dtag not in new_tags:
+ if dtag in auth["tags"] and dtag not in new_tags:
new_tags.append(dtag)
- if 'name' in auth:
- if auth['name'] not in PROJECT:
+ if "name" in auth:
+ if auth["name"] not in PROJECT:
LOGGER.warning(f"Project {auth['name']} is not defined")
- elif PROJECT[auth['name']] and auth['name'] not in new_tags:
- new_tags.append(PROJECT[auth['name']])
+ elif PROJECT[auth["name"]] and auth["name"] not in new_tags:
+ new_tags.append(PROJECT[auth["name"]])
return new_tags
def persist_author(key, authors, persist):
- ''' Add authors to be persisted
- Keyword arguments:
- key: DOI
- authors: list of detailed authors
- persist: dict keyed by DOI with value of the Crossref/DataCite record
- Returns:
- None
- '''
+ """Add authors to be persisted
+ Keyword arguments:
+ key: DOI
+ authors: list of detailed authors
+ persist: dict keyed by DOI with value of the Crossref/DataCite record
+ Returns:
+ None
+ """
# Update jrc_author
jrc_author = []
for auth in authors:
- if auth['janelian'] and 'employeeId' in auth and auth['employeeId']:
- jrc_author.append(auth['employeeId'])
+ if auth["janelian"] and "employeeId" in auth and auth["employeeId"]:
+ jrc_author.append(auth["employeeId"])
if jrc_author:
LOGGER.debug(f"Added jrc_author {jrc_author} to {key}")
- persist[key]['jrc_author'] = jrc_author
+ persist[key]["jrc_author"] = jrc_author
else:
LOGGER.warning(f"No Janelia authors for {key}")
def get_suporg_code(name):
- ''' Get the code for a supervisory organization
- Keyword arguments:
- name: name of the organization
- Returns:
- Code for the organization
- '''
+ """Get the code for a supervisory organization
+ Keyword arguments:
+ name: name of the organization
+ Returns:
+ Code for the organization
+ """
if name in SUPORG:
return SUPORG[name]
return None
def add_tags(persist):
- ''' Add tags to DOI records that will be persisted (jrc_author, jrc_tag)
- Keyword arguments:
- persist: dict keyed by DOI with value of the Crossref/DataCite record
- Returns:
- None
- '''
- coll = DB['dis'].orcid
- for key, val in tqdm(persist.items(), desc='Add jrc_author and jrc_tag'):
+ """Add tags to DOI records that will be persisted (jrc_author, jrc_tag)
+ Keyword arguments:
+ persist: dict keyed by DOI with value of the Crossref/DataCite record
+ Returns:
+ None
+ """
+ coll = DB["dis"].orcid
+ for key, val in tqdm(persist.items(), desc="Add jrc_author and jrc_tag"):
try:
- rec = DB['dis'].dois.find_one({"doi": key})
+ rec = DB["dis"].dois.find_one({"doi": key})
except Exception as err:
terminate_program(err)
try:
@@ -681,210 +718,217 @@ def add_tags(persist):
new_tags = get_tags(authors)
tags = []
tag_names = []
- if 'jrc_tag' in persist:
- tags.extend(persist['jrc_tag'])
+ if "jrc_tag" in persist:
+ tags.extend(persist["jrc_tag"])
for etag in tags:
if isinstance(etag, str):
tag_names.append(etag)
else:
- tag_names.append(etag['name'])
+ tag_names.append(etag["name"])
else:
- if rec and 'jrc_tag' in rec:
- tags.extend(rec['jrc_tag'])
+ if rec and "jrc_tag" in rec:
+ tags.extend(rec["jrc_tag"])
for etag in tags:
if isinstance(etag, str):
tag_names.append(etag)
else:
- tag_names.append(etag['name'])
- names = [etag['name'] for etag in tags]
+ tag_names.append(etag["name"])
+ names = [etag["name"] for etag in tags]
for tag in new_tags:
if tag not in names:
code = get_suporg_code(tag)
- tagtype = 'suporg' if code else 'affiliation'
+ tagtype = "suporg" if code else "affiliation"
tags.append({"name": tag, "code": code, "type": tagtype})
if tags:
LOGGER.debug(f"Added jrc_tag {tags} to {key}")
- persist[key]['jrc_tag'] = tags
- if rec and 'jrc_newsletter' in rec:
+ persist[key]["jrc_tag"] = tags
+ if rec and "jrc_newsletter" in rec:
LOGGER.warning(f"Skipping jrc_author update for {key}")
else:
persist_author(key, authors, persist)
def get_field(rec):
- ''' Get the field name for the authors
- Keyword arguments:
- rec: Crossref/DataCite record
- Returns:
- Field name and True if DataCite
- '''
- if 'jrc_obtained_from' in rec and rec['jrc_obtained_from'] == "DataCite":
- return 'creators', True
- return 'author', False
+ """Get the field name for the authors
+ Keyword arguments:
+ rec: Crossref/DataCite record
+ Returns:
+ Field name and True if DataCite
+ """
+ if "jrc_obtained_from" in rec and rec["jrc_obtained_from"] == "DataCite":
+ return "creators", True
+ return "author", False
def add_first_last_authors(rec):
- ''' Add first and last authors to record
- Keyword arguments:
- rec: Crossref/DataCite record
- Returns:
- None
- '''
+ """Add first and last authors to record
+ Keyword arguments:
+ rec: Crossref/DataCite record
+ Returns:
+ None
+ """
first = []
field, datacite = get_field(rec)
if field in rec:
if not datacite:
# First author(s)
for auth in rec[field]:
- if 'sequence' in auth and auth['sequence'] == 'additional':
+ if "sequence" in auth and auth["sequence"] == "additional":
break
- if not('given' in auth and 'family' in auth):
+ if not ("given" in auth and "family" in auth):
LOGGER.warning(f"Missing author name in {rec['doi']} author {auth}")
break
try:
- janelian = DL.is_janelia_author(auth, DB['dis'].orcid, PROJECT)
+ janelian = DL.is_janelia_author(auth, DB["dis"].orcid, PROJECT)
except Exception as err:
LOGGER.error(f"Could not process {rec['doi']}")
terminate_program(err)
if janelian:
first.append(janelian)
else:
- janelian = DL.is_janelia_author(rec[field][0], DB['dis'].orcid, PROJECT)
+ janelian = DL.is_janelia_author(rec[field][0], DB["dis"].orcid, PROJECT)
if janelian:
first.append(janelian)
okay = True
if not datacite:
- if not('given' in rec[field][-1] and 'family' in rec[field][-1]):
+ if not ("given" in rec[field][-1] and "family" in rec[field][-1]):
okay = False
- elif not('givenName' in rec[field][-1] and 'familyName' in rec[field][-1]):
+ elif not ("givenName" in rec[field][-1] and "familyName" in rec[field][-1]):
okay = False
if okay:
- janelian = DL.is_janelia_author(rec[field][-1], DB['dis'].orcid, PROJECT)
+ janelian = DL.is_janelia_author(rec[field][-1], DB["dis"].orcid, PROJECT)
if janelian:
rec["jrc_last_author"] = janelian
else:
- LOGGER.warning(f"Missing author name in {rec['doi']} author {rec[field][-1]}")
+ LOGGER.warning(
+ f"Missing author name in {rec['doi']} author {rec[field][-1]}"
+ )
if first:
rec["jrc_first_author"] = first
- if (not first) and ('jrc_last_author' not in rec):
+ if (not first) and ("jrc_last_author" not in rec):
return
first = []
- det = DL.get_author_details(rec, DB['dis']['orcid'])
+ det = DL.get_author_details(rec, DB["dis"]["orcid"])
for auth in det:
- if auth['janelian'] and 'employeeId' in auth and 'is_first' in auth:
- first.append(auth['employeeId'])
- if auth['janelian'] and 'employeeId' in auth and 'is_last' in auth:
- rec["jrc_last_id"] = auth['employeeId']
+ if auth["janelian"] and "employeeId" in auth and "is_first" in auth:
+ first.append(auth["employeeId"])
+ if auth["janelian"] and "employeeId" in auth and "is_last" in auth:
+ rec["jrc_last_id"] = auth["employeeId"]
if first:
rec["jrc_first_id"] = first
def update_mongodb(persist):
- ''' Persist DOI records in MongoDB
- Keyword arguments:
- persist: dict keyed by DOI with value of the Crossref/DataCite record
- Returns:
- None
- '''
- coll = DB['dis'].dois
- for key, val in tqdm(persist.items(), desc='Update DIS Mongo'):
- val['doi'] = key
+ """Persist DOI records in MongoDB
+ Keyword arguments:
+ persist: dict keyed by DOI with value of the Crossref/DataCite record
+ Returns:
+ None
+ """
+ coll = DB["dis"].dois
+ for key, val in tqdm(persist.items(), desc="Update DIS Mongo"):
+ val["doi"] = key
# Publishing date
- val['jrc_publishing_date'] = DL.get_publishing_date(val)
+ val["jrc_publishing_date"] = DL.get_publishing_date(val)
# First/last authors
add_first_last_authors(val)
- for aname in ('jrc_first_author', 'jrc_first_id', 'jrc_last_author', 'jrc_last_id'):
+ for aname in (
+ "jrc_first_author",
+ "jrc_first_id",
+ "jrc_last_author",
+ "jrc_last_id",
+ ):
if aname in val:
LOGGER.debug(f"Added {aname} {val[aname]} to {key}")
# Insert/update timestamps
if key not in EXISTING:
- val['jrc_inserted'] = datetime.today().replace(microsecond=0)
- val['jrc_updated'] = datetime.today().replace(microsecond=0)
+ val["jrc_inserted"] = datetime.today().replace(microsecond=0)
+ val["jrc_updated"] = datetime.today().replace(microsecond=0)
LOGGER.debug(val)
if ARG.WRITE:
if ARG.DOI or ARG.FILE:
- val['jrc_load_source'] = "Manual"
+ val["jrc_load_source"] = "Manual"
uname = JRC.get_user_name()
if uname and uname != "root":
- val['jrc_loaded_by'] = uname
+ val["jrc_loaded_by"] = uname
else:
- val['jrc_load_source'] = "Sync"
+ val["jrc_load_source"] = "Sync"
coll.update_one({"doi": key}, {"$set": val}, upsert=True)
if key in TO_BE_PROCESSED:
try:
- DB['dis'].dois_to_process.delete_one({"doi": key})
+ DB["dis"].dois_to_process.delete_one({"doi": key})
except Exception as err:
LOGGER.error(f"Could not delete {key} from dois_to_process: {err}")
if key in EXISTING:
- COUNT['update'] += 1
+ COUNT["update"] += 1
if key not in UPDATED:
UPDATED[key] = "Unknown"
else:
- COUNT['insert'] += 1
+ COUNT["insert"] += 1
INSERTED[key] = DL.get_publishing_date(val)
def update_dois(specified, persist):
- """ Persist new or updated DOIs
- Keyword arguments:
- specified: distinct input DOIs
- persist: DOIs that need persisting
- Returns:
- None
+ """Persist new or updated DOIs
+ Keyword arguments:
+ specified: distinct input DOIs
+ persist: DOIs that need persisting
+ Returns:
+ None
"""
- if ARG.TARGET == 'flyboy':
+ if ARG.TARGET == "flyboy":
update_flyboy(persist)
if not ARG.DOI and not ARG.FILE:
perform_backcheck(specified)
update_config_database(persist)
- elif ARG.TARGET == 'dis':
+ elif ARG.TARGET == "dis":
add_tags(persist)
update_mongodb(persist)
def persist_if_updated(doi, msg, persist):
- """ Decide if we need to persist a DOI
- Keyword arguments:
- doi: DOI
- msg: message from DOI record
- persist: dict of DOIs to persist
- Returns:
- None
+ """Decide if we need to persist a DOI
+ Keyword arguments:
+ doi: DOI
+ msg: message from DOI record
+ persist: dict of DOIs to persist
+ Returns:
+ None
"""
if DL.is_datacite(doi):
# DataCite
- if datacite_needs_update(doi, msg['data']):
- persist[doi] = msg['data']['attributes']
- persist[doi]['jrc_obtained_from'] = 'DataCite'
- COUNT['foundd'] += 1
+ if datacite_needs_update(doi, msg["data"]):
+ persist[doi] = msg["data"]["attributes"]
+ persist[doi]["jrc_obtained_from"] = "DataCite"
+ COUNT["foundd"] += 1
else:
# Crossref
- if crossref_needs_update(doi, msg['message']):
- persist[doi] = msg['message']
- persist[doi]['jrc_obtained_from'] = 'Crossref'
- COUNT['foundc'] += 1
+ if crossref_needs_update(doi, msg["message"]):
+ persist[doi] = msg["message"]
+ persist[doi]["jrc_obtained_from"] = "Crossref"
+ COUNT["foundc"] += 1
def process_dois():
- """ Process a list of DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Process a list of DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
LOGGER.info(f"Started run (version {__version__})")
rows = get_dois()
if not rows:
terminate_program("No DOIs were found")
- specified = {} # Dict of distinct DOIs received as input (value is True)
- persist = {} # DOIs that will be persisted in a database (value is record)
- for odoi in tqdm(rows['dois'], desc='DOIs'):
- if '//' in odoi:
+ specified = {} # Dict of distinct DOIs received as input (value is True)
+ persist = {} # DOIs that will be persisted in a database (value is record)
+ for odoi in tqdm(rows["dois"], desc="DOIs"):
+ if "//" in odoi:
terminate_program(f"Invalid DOI: {odoi}")
- doi = odoi if ARG.TARGET == 'flyboy' else odoi.lower().strip()
- COUNT['found'] += 1
+ doi = odoi if ARG.TARGET == "flyboy" else odoi.lower().strip()
+ COUNT["found"] += 1
if doi in specified:
- COUNT['duplicate'] += 1
+ COUNT["duplicate"] += 1
LOGGER.debug(f"{doi} appears in input more than once")
continue
specified[doi] = True
@@ -894,13 +938,13 @@ def process_dois():
if DL.is_datacite(doi):
msg = get_doi_record(doi)
if msg:
- persist[doi] = msg['data']['attributes']
- persist[doi]['jrc_obtained_from'] = 'DataCite'
+ persist[doi] = msg["data"]["attributes"]
+ persist[doi]["jrc_obtained_from"] = "DataCite"
else:
msg = get_doi_record(doi)
if msg:
- persist[doi] = msg['message']
- persist[doi]['jrc_obtained_from'] = 'Crossref'
+ persist[doi] = msg["message"]
+ persist[doi]["jrc_obtained_from"] = "Crossref"
continue
msg = get_doi_record(doi)
if not msg:
@@ -910,12 +954,12 @@ def process_dois():
def generate_emails():
- ''' Generate and send an email
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Generate and send an email
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
msg = JRC.get_run_data(__file__, __version__)
if ARG.SOURCE:
msg += f"DOIs passed in from {ARG.SOURCE}\n"
@@ -924,53 +968,68 @@ def generate_emails():
msg += f"\n{doi}"
try:
LOGGER.info(f"Sending email to {DISCONFIG['receivers']}")
- JRC.send_email(msg, DISCONFIG['sender'], DISCONFIG['developer'] \
- if ARG.MANIFOLD == 'dev' else DISCONFIG['receivers'],
- "New DOIs")
+ JRC.send_email(
+ msg,
+ DISCONFIG["sender"],
+ DISCONFIG["developer"] if ARG.MANIFOLD == "dev" else DISCONFIG["receivers"],
+ "New DOIs",
+ )
except Exception as err:
LOGGER.error(err)
if not TO_BE_PROCESSED:
return
msg = JRC.get_run_data(__file__, __version__)
- msg += "The following DOIs from a previous weekly cycle have been added to the database. " \
- + "Metadata should be updated as soon as possible."
+ msg += (
+ "The following DOIs from a previous weekly cycle have been added to the database. "
+ + "Metadata should be updated as soon as possible."
+ )
for doi in TO_BE_PROCESSED:
msg += f"\n{doi}"
try:
LOGGER.info(f"Sending email to {DISCONFIG['librarian']}")
- JRC.send_email(msg, DISCONFIG['sender'], DISCONFIG['developer'] if ARG.MANIFOLD == 'dev' \
- else DISCONFIG['librarian'],
- "Action needed: new DOIs")
+ JRC.send_email(
+ msg,
+ DISCONFIG["sender"],
+ DISCONFIG["developer"] if ARG.MANIFOLD == "dev" else DISCONFIG["librarian"],
+ "Action needed: new DOIs",
+ )
except Exception as err:
LOGGER.error(err)
def post_activities():
- """ Write output files and report on program operations
- Keyword arguments:
- None
- Returns:
- None
+ """Write output files and report on program operations
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
if ARG.OUTPUT:
# Write files
timestamp = strftime("%Y%m%dT%H%M%S")
- for ftype in ('INSERTED', 'UPDATED', 'CROSSREF', 'DATACITE',
- 'CROSSREF_CALL', 'DATACITE_CALL', 'MISSING'):
+ for ftype in (
+ "INSERTED",
+ "UPDATED",
+ "CROSSREF",
+ "DATACITE",
+ "CROSSREF_CALL",
+ "DATACITE_CALL",
+ "MISSING",
+ ):
if not globals()[ftype]:
continue
fname = f"doi_{ftype.lower()}_{timestamp}.txt"
- with open(fname, 'w', encoding='ascii') as outstream:
+ with open(fname, "w", encoding="ascii") as outstream:
for key, val in globals()[ftype].items():
- if ftype in ('INSERTED', 'UPDATED'):
+ if ftype in ("INSERTED", "UPDATED"):
outstream.write(f"{key}\t{val}\n")
else:
outstream.write(f"{key}\n")
# Report
if ARG.SOURCE:
print(f"Source: {ARG.SOURCE}")
- if ARG.TARGET == 'dis' and (not ARG.DOI and not ARG.FILE):
+ if ARG.TARGET == "dis" and (not ARG.DOI and not ARG.FILE):
print(f"DOIs fetched from Crossref: {COUNT['crossref']:,}")
print(f"DOIs fetched from DataCite: {COUNT['datacite']:,}")
print(f"DOIs specified: {COUNT['found']:,}")
@@ -980,7 +1039,7 @@ def post_activities():
print(f"DOIs not found: {COUNT['notfound']:,}")
print(f"Duplicate DOIs: {COUNT['duplicate']:,}")
print(f"DOIs not needing updates: {COUNT['noupdate']:,}")
- if ARG.TARGET == 'flyboy':
+ if ARG.TARGET == "flyboy":
print(f"DOIs found in FlyBoy: {COUNT['foundfb']:,}")
print(f"DOIs inserted/updated in FlyBoy: {COUNT['flyboy']:,}")
print(f"DOIs deleted from FlyBoy: {COUNT['delete']:,}")
@@ -995,52 +1054,102 @@ def post_activities():
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Sync DOIs")
- PARSER.add_argument('--doi', dest='DOI', action='store',
- help='Single DOI to process')
- PARSER.add_argument('--source', dest='SOURCE', action='store',
- help='Source of DOIs (arXiv, figshae, etc.)')
- PARSER.add_argument('--target', dest='TARGET', action='store',
- default='dis', choices=['flyboy', 'dis'],
- help='Target system (flyboy or dis)')
- PARSER.add_argument('--file', dest='FILE', action='store',
- type=argparse.FileType("r", encoding="ascii"),
- help='File of DOIs to process')
- PARSER.add_argument('--pipe', dest='PIPE', action='store_true',
- default=False, help='Accepted input from STDIN')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--insert', dest='INSERT', action='store_true',
- default=False, help='Only look for new records')
- PARSER.add_argument('--force', dest='FORCE', action='store_true',
- default=False, help='Force update')
- PARSER.add_argument('--output', dest='OUTPUT', action='store_true',
- default=False, help='Produce output files')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Sync DOIs")
+ PARSER.add_argument(
+ "--doi", dest="DOI", action="store", help="Single DOI to process"
+ )
+ PARSER.add_argument(
+ "--source",
+ dest="SOURCE",
+ action="store",
+ help="Source of DOIs (arXiv, figshae, etc.)",
+ )
+ PARSER.add_argument(
+ "--target",
+ dest="TARGET",
+ action="store",
+ default="dis",
+ choices=["flyboy", "dis"],
+ help="Target system (flyboy or dis)",
+ )
+ PARSER.add_argument(
+ "--file",
+ dest="FILE",
+ action="store",
+ type=argparse.FileType("r", encoding="ascii"),
+ help="File of DOIs to process",
+ )
+ PARSER.add_argument(
+ "--pipe",
+ dest="PIPE",
+ action="store_true",
+ default=False,
+ help="Accepted input from STDIN",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--insert",
+ dest="INSERT",
+ action="store_true",
+ default=False,
+ help="Only look for new records",
+ )
+ PARSER.add_argument(
+ "--force", dest="FORCE", action="store_true", default=False, help="Force update"
+ )
+ PARSER.add_argument(
+ "--output",
+ dest="OUTPUT",
+ action="store_true",
+ default=False,
+ help="Produce output files",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
CONFIG = configparser.ConfigParser()
- CONFIG.read('config.ini')
+ CONFIG.read("config.ini")
initialize_program()
DISCONFIG = JRC.simplenamespace_to_dict(JRC.get_config("dis"))
REST = JRC.get_config("rest_services")
START_TIME = datetime.now()
- if ARG.TARGET == 'flyboy':
+ if ARG.TARGET == "flyboy":
EXISTING = JRC.simplenamespace_to_dict(JRC.get_config(CKEY[ARG.TARGET]))
else:
EXISTING = get_dis_dois_from_mongo()
try:
- PROJECT = DL.get_project_map(DB['dis'].project_map)
+ PROJECT = DL.get_project_map(DB["dis"].project_map)
except Exception as gerr:
terminate_program(gerr)
process_dois()
diff --git a/sync/bin/update_orcid.py b/sync/bin/update_orcid.py
index 0fc2ba5..ad494b4 100644
--- a/sync/bin/update_orcid.py
+++ b/sync/bin/update_orcid.py
@@ -1,8 +1,8 @@
-''' update_orcid.py
- Update the MongoDB orcid collection with ORCIDs and names for Janelia authors
-'''
+"""update_orcid.py
+Update the MongoDB orcid collection with ORCIDs and names for Janelia authors
+"""
-__version__ = '2.5.0'
+__version__ = "2.5.0"
import argparse
import collections
@@ -29,13 +29,14 @@
NEW_ORCID = {}
ALUMNI = []
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -44,225 +45,238 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize database connection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize database connection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
if "PEOPLE_API_KEY" not in os.environ:
terminate_program("Missing token - set in PEOPLE_API_KEY environment variable")
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
# Initialize the PRESENT dict with rows that have ORCIDs
try:
- rows = DB['dis'].orcid.find({"orcid": {"$exists": True}})
+ rows = DB["dis"].orcid.find({"orcid": {"$exists": True}})
except Exception as err:
terminate_program(err)
for row in rows:
- PRESENT[row['orcid']] = row
+ PRESENT[row["orcid"]] = row
LOGGER.info(f"{len(PRESENT)} DOIs are already in the collection")
def add_name(oid, oids, family, given):
- ''' If the ORCID ID is new, add it to the dict. Otherwise, update it
- with new family/given name.
- Keyword arguments:
- oid: ORCID ID
- oids: ORCID ID dict
- family: family name
- given: given name
- Returns:
- None
- '''
+ """If the ORCID ID is new, add it to the dict. Otherwise, update it
+ with new family/given name.
+ Keyword arguments:
+ oid: ORCID ID
+ oids: ORCID ID dict
+ family: family name
+ given: given name
+ Returns:
+ None
+ """
if oid in oids:
- if family not in oids[oid]['family']:
- oids[oid]['family'].append(family)
- if given not in oids[oid]['given']:
- oids[oid]['given'].append(given)
+ if family not in oids[oid]["family"]:
+ oids[oid]["family"].append(family)
+ if given not in oids[oid]["given"]:
+ oids[oid]["given"].append(given)
else:
oids[oid] = {"family": [family], "given": [given]}
if oid in PRESENT:
if not ARG.WRITE:
- COUNT['update'] += 1
+ COUNT["update"] += 1
else:
if not ARG.WRITE:
- COUNT['insert'] += 1
+ COUNT["insert"] += 1
print(oid, json.dumps(oids[oid], indent=2))
NEW_ORCID[oid] = {"family": [family], "given": [given]}
def process_author(aut, oids, source="crossref"):
- ''' Process a single author record
- Keyword arguments:
- aut: author record
- oids: ORCID ID dict
- Returns:
- None
- '''
- for aff in aut['affiliation']:
- if 'Janelia' in aff['name']:
- oid = re.sub(r'.*/', '', aut['ORCID'])
+ """Process a single author record
+ Keyword arguments:
+ aut: author record
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
+ for aff in aut["affiliation"]:
+ if "Janelia" in aff["name"]:
+ oid = re.sub(r".*/", "", aut["ORCID"])
if source == "crossref":
- add_name(oid, oids, aut['family'], aut['given'])
+ add_name(oid, oids, aut["family"], aut["given"])
break
def get_name(oid):
- ''' Get an author's first and last name from ORCID
- Keyword arguments:
- oid: ORCID
- Returns:
- family and given name
- '''
+ """Get an author's first and last name from ORCID
+ Keyword arguments:
+ oid: ORCID
+ Returns:
+ family and given name
+ """
url = f"{CONFIG['orcid']['base']}{oid}"
try:
- resp = requests.get(url, timeout=10,
- headers={"Accept": "application/json"})
+ resp = requests.get(url, timeout=10, headers={"Accept": "application/json"})
except Exception as err:
terminate_program(err)
try:
- return resp.json()['person']['name']['family-name']['value'], \
- resp.json()['person']['name']['given-names']['value']
+ return resp.json()["person"]["name"]["family-name"]["value"], resp.json()[
+ "person"
+ ]["name"]["given-names"]["value"]
except Exception as err:
- LOGGER.warning(resp.json()['person']['name'])
+ LOGGER.warning(resp.json()["person"]["name"])
LOGGER.warning(err)
return None, None
def add_from_orcid(oids):
- ''' Find additional ORCID IDs using the ORCID API
- Keyword arguments:
- oids: ORCID ID dict
- Returns:
- None
- '''
+ """Find additional ORCID IDs using the ORCID API
+ Keyword arguments:
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
authors = []
base = f"{CONFIG['orcid']['base']}search"
- for url in ('/?q=ror-org-id:"' + CONFIG['ror']['janelia'] + '"',
- '/?q=affiliation-org-name:"Janelia Research Campus"',
- '/?q=affiliation-org-name:"Janelia Farm Research Campus"'):
+ for url in (
+ '/?q=ror-org-id:"' + CONFIG["ror"]["janelia"] + '"',
+ '/?q=affiliation-org-name:"Janelia Research Campus"',
+ '/?q=affiliation-org-name:"Janelia Farm Research Campus"',
+ ):
try:
- resp = requests.get(f"{base}{url}", timeout=10,
- headers={"Accept": "application/json"})
+ resp = requests.get(
+ f"{base}{url}", timeout=10, headers={"Accept": "application/json"}
+ )
except Exception as err:
terminate_program(err)
- for orcid in resp.json()['result']:
- authors.append(orcid['orcid-identifier']['path'])
- COUNT['orcid'] = len(authors)
- for oid in tqdm(authors, desc='Janelians from ORCID'):
+ for orcid in resp.json()["result"]:
+ authors.append(orcid["orcid-identifier"]["path"])
+ COUNT["orcid"] = len(authors)
+ for oid in tqdm(authors, desc="Janelians from ORCID"):
family, given = get_name(oid)
if family and given:
add_name(oid, oids, family, given)
def people_by_name(first, surname):
- ''' Search for a surname in the people system
- Keyword arguments:
- first: first name
- surname: last name
- Returns:
- List of people
- '''
+ """Search for a surname in the people system
+ Keyword arguments:
+ first: first name
+ surname: last name
+ Returns:
+ List of people
+ """
try:
people = JRC.call_people_by_name(surname)
except Exception as err:
terminate_program(err)
filtered = []
for person in people:
- if person['locationName'] != 'Janelia Research Campus':
+ if person["locationName"] != "Janelia Research Campus":
continue
- if person['nameLastPreferred'].lower() == surname.lower() \
- and person['nameFirstPreferred'].lower() == first.lower():
+ if (
+ person["nameLastPreferred"].lower() == surname.lower()
+ and person["nameFirstPreferred"].lower() == first.lower()
+ ):
filtered.append(person)
return filtered
def update_group_status(rec, idresp):
- ''' Add group tags to the record
- Keyword arguments:
- rec: orcid record
- idresp: People service response
- Returns:
- None
- '''
- if 'managedTeams' not in idresp:
+ """Add group tags to the record
+ Keyword arguments:
+ rec: orcid record
+ idresp: People service response
+ Returns:
+ None
+ """
+ if "managedTeams" not in idresp:
return
- lab = ''
- for team in idresp['managedTeams']:
- if team['supOrgSubType'] == 'Lab' and team['supOrgName'].endswith(' Lab'):
- if team['supOrgCode'] in DISCONFIG['sup_ignore']:
+ lab = ""
+ for team in idresp["managedTeams"]:
+ if team["supOrgSubType"] == "Lab" and team["supOrgName"].endswith(" Lab"):
+ if team["supOrgCode"] in DISCONFIG["sup_ignore"]:
continue
if lab:
- terminate_program(f"Multiple labs found for {idresp['nameFirstPreferred']} " \
- + idresp['nameLastPreferred'])
- lab = team['supOrgName']
- rec['group'] = lab
- rec['group_code'] = team['supOrgCode']
+ terminate_program(
+ f"Multiple labs found for {idresp['nameFirstPreferred']} "
+ + idresp["nameLastPreferred"]
+ )
+ lab = team["supOrgName"]
+ rec["group"] = lab
+ rec["group_code"] = team["supOrgCode"]
def get_person(people):
- ''' Get a person record
- Keyword arguments:
- people: list of people
- Returns:
- Person record and person ID record
- '''
+ """Get a person record
+ Keyword arguments:
+ people: list of people
+ Returns:
+ Person record and person ID record
+ """
if len(people) == 1:
- idresp = JRC.call_people_by_id(people[0]['employeeId'])
+ idresp = JRC.call_people_by_id(people[0]["employeeId"])
return people[0], idresp
- latest = ''
+ latest = ""
saved = {"person": None, "idresp": None}
idresp = None
for person in people:
- first = person['nameFirstPreferred']
- last = person['nameLastPreferred']
- idresp = JRC.call_people_by_id(person['employeeId'])
- if 'terminationDate' in idresp and idresp['terminationDate']:
+ first = person["nameFirstPreferred"]
+ last = person["nameLastPreferred"]
+ idresp = JRC.call_people_by_id(person["employeeId"])
+ if "terminationDate" in idresp and idresp["terminationDate"]:
LOGGER.warning(f"{first} {last} was terminated {idresp['terminationDate']}")
continue
- if 'hireDate' in idresp and idresp['hireDate']:
- if not latest or idresp['hireDate'] > latest:
- latest = idresp['hireDate']
- saved['person'] = person
- saved['idresp'] = idresp
- if saved['person']:
+ if "hireDate" in idresp and idresp["hireDate"]:
+ if not latest or idresp["hireDate"] > latest:
+ latest = idresp["hireDate"]
+ saved["person"] = person
+ saved["idresp"] = idresp
+ if saved["person"]:
LOGGER.warning(f"Selected {first} {last} {latest}")
- return saved['person'], saved['idresp']
+ return saved["person"], saved["idresp"]
def add_people_information(first, surname, oids, oid):
- ''' Correlate a name from ORCID with HHMI's People service
- Keyword arguments:
- first: given name
- surname: family name
- oid: ORCID ID
- oids: ORCID ID dict
- Returns:
- None
- '''
+ """Correlate a name from ORCID with HHMI's People service
+ Keyword arguments:
+ first: given name
+ surname: family name
+ oid: ORCID ID
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
found = False
people = people_by_name(first, surname)
if people:
person, idresp = get_person(people)
if person:
found = True
- oids[oid]['employeeId'] = people[0]['employeeId']
- oids[oid]['userIdO365'] = people[0]['userIdO365']
- if 'group leader' in people[0]['businessTitle'].lower():
- oids[oid]['group'] = f"{first} {surname} Lab"
- if people[0]['businessTitle'] == 'JRC Alumni':
- oids[oid]['alumni'] = True
+ oids[oid]["employeeId"] = people[0]["employeeId"]
+ oids[oid]["userIdO365"] = people[0]["userIdO365"]
+ if "group leader" in people[0]["businessTitle"].lower():
+ oids[oid]["group"] = f"{first} {surname} Lab"
+ if people[0]["businessTitle"] == "JRC Alumni":
+ oids[oid]["alumni"] = True
if idresp:
update_group_status(oids[oid], idresp)
DL.get_name_combinations(idresp, oids[oid])
@@ -273,90 +287,90 @@ def add_people_information(first, surname, oids, oid):
def correlate_person(oid, oids):
- ''' Correlate a name from ORCID with HHMI's People service
- Keyword arguments:
- oid: ORCID ID
- oids: ORCID ID dict
- Returns:
- None
- '''
+ """Correlate a name from ORCID with HHMI's People service
+ Keyword arguments:
+ oid: ORCID ID
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
val = oids[oid]
- for surname in val['family']:
- for first in val['given']:
+ for surname in val["family"]:
+ for first in val["given"]:
found = add_people_information(first, surname, oids, oid)
if found:
break
if found:
break
- #if not found:
+ # if not found:
# LOGGER.warning(f"Could not find a record in People for {first} {surname}")
def preserve_mongo_names(current, oids):
- ''' Preserve names from sources other than this program in the oids dictionary
- Keyword arguments:
- oids: ORCID ID dict
- Returns:
- None
- '''
- oid = current['orcid']
- for field in ('family', 'given'):
+ """Preserve names from sources other than this program in the oids dictionary
+ Keyword arguments:
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
+ oid = current["orcid"]
+ for field in ("family", "given"):
for name in current[field]:
if name not in oids[oid][field]:
oids[oid][field].append(name)
def add_janelia_info(oids):
- ''' Find Janelia information for each ORCID ID
- Keyword arguments:
- oids: ORCID ID dict
- Returns:
- None
- '''
- for oid in tqdm(oids, desc='Janelians from orcid collection'):
+ """Find Janelia information for each ORCID ID
+ Keyword arguments:
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
+ for oid in tqdm(oids, desc="Janelians from orcid collection"):
if oid in PRESENT:
preserve_mongo_names(PRESENT[oid], oids)
- if 'alumni' in PRESENT[oid]:
+ if "alumni" in PRESENT[oid]:
continue
- if oid in PRESENT and 'employeeId' in PRESENT[oid] and not ARG.FORCE:
+ if oid in PRESENT and "employeeId" in PRESENT[oid] and not ARG.FORCE:
continue
correlate_person(oid, oids)
def write_records(oids):
- ''' Write records to Mongo
- Keyword arguments:
- oids: ORCID ID dict
- Returns:
- None
- '''
- coll = DB['dis'].orcid
- for oid, val in tqdm(oids.items(), desc='Updating orcid collection'):
+ """Write records to Mongo
+ Keyword arguments:
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
+ coll = DB["dis"].orcid
+ for oid, val in tqdm(oids.items(), desc="Updating orcid collection"):
if oid:
result = coll.update_one({"orcid": oid}, {"$set": val}, upsert=True)
else:
print(f"INSERT {val}")
result = coll.insert_one(val)
- if hasattr(result, 'matched_count') and result.matched_count:
- COUNT['update'] += 1
+ if hasattr(result, "matched_count") and result.matched_count:
+ COUNT["update"] += 1
else:
- COUNT['insert'] += 1
+ COUNT["insert"] += 1
print(f"New entry: {val}")
def generate_email():
- ''' Generate and send an email
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Generate and send an email
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
msg = JRC.get_run_data(__file__, __version__)
if NEW_ORCID:
msg += f"The following ORCIDs were inserted into the {ARG.MANIFOLD} MongoDB DIS database:"
for oid, val in NEW_ORCID.items():
if not oid:
- oid = '(no ORCID)'
+ oid = "(no ORCID)"
msg += f"\n{oid}: {val}"
if ALUMNI:
msg += "\nThe following ORCIDs were set to alumni status:"
@@ -364,83 +378,88 @@ def generate_email():
msg += f"\n{alum}"
try:
LOGGER.info(f"Sending email to {DISCONFIG['receivers']}")
- JRC.send_email(msg, DISCONFIG['sender'], DISCONFIG['developer'] \
- if ARG.MANIFOLD == 'dev' else DISCONFIG['receivers'],
- "ORCID updates")
+ JRC.send_email(
+ msg,
+ DISCONFIG["sender"],
+ DISCONFIG["developer"] if ARG.MANIFOLD == "dev" else DISCONFIG["receivers"],
+ "ORCID updates",
+ )
except Exception as err:
LOGGER.error(err)
def handle_name(oids):
- ''' Handle a name from the command line
- Keyword arguments:
- oids: ORCID ID dict
- Returns:
- None
- '''
- add_name('', oids, ARG.FAMILY.capitalize(), ARG.GIVEN.capitalize())
- COUNT['orcid'] += 1
- correlate_person('', oids)
- if 'employeeId' not in oids['']:
+ """Handle a name from the command line
+ Keyword arguments:
+ oids: ORCID ID dict
+ Returns:
+ None
+ """
+ add_name("", oids, ARG.FAMILY.capitalize(), ARG.GIVEN.capitalize())
+ COUNT["orcid"] += 1
+ correlate_person("", oids)
+ if "employeeId" not in oids[""]:
terminate_program("Could not find a record in People")
try:
- row = DB['dis'].orcid.find_one({"employeeId": oids['']['employeeId']})
+ row = DB["dis"].orcid.find_one({"employeeId": oids[""]["employeeId"]})
except Exception as err:
terminate_program(err)
if row:
terminate_program("Record already exists")
- if not should_continue(oids['']):
+ if not should_continue(oids[""]):
LOGGER.warning("Record was not inserted")
terminate_program()
def should_continue(rec):
- ''' Ask user if we should continue
- Keyword arguments:
- rec: orcid collection record
- Returns:
- True or False
- '''
+ """Ask user if we should continue
+ Keyword arguments:
+ rec: orcid collection record
+ Returns:
+ True or False
+ """
print(json.dumps(rec, indent=2))
quest = [inquirer.Confirm("continue", message="Insert this record?", default=True)]
ans = inquirer.prompt(quest)
- if not ans or not ans['continue']:
+ if not ans or not ans["continue"]:
return False
return True
def perform_cleanup():
- ''' Check all ORCIDs to see if they are alumni
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Check all ORCIDs to see if they are alumni
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
payload = {"employeeId": {"$exists": True}, "alumni": {"$exists": False}}
try:
- cnt = DB['dis'].orcid.count_documents(payload)
- rows = DB['dis'].orcid.find(payload)
+ cnt = DB["dis"].orcid.count_documents(payload)
+ rows = DB["dis"].orcid.find(payload)
except Exception as err:
terminate_program(err)
LOGGER.info(f"Found {cnt} potential alumni")
- for row in tqdm(rows, desc='Alumni', total=cnt):
- idresp = JRC.call_people_by_id(row['employeeId'])
- if not idresp or not idresp['employeeId']:
+ for row in tqdm(rows, desc="Alumni", total=cnt):
+ idresp = JRC.call_people_by_id(row["employeeId"])
+ if not idresp or not idresp["employeeId"]:
msg = f"{row['given']} {row['family']} ({row['employeeId']}) is now alumni"
LOGGER.warning(msg)
ALUMNI.append(msg)
- COUNT['alumni'] += 1
+ COUNT["alumni"] += 1
if ARG.WRITE:
- DB['dis'].orcid.update_one({"_id": row['_id']}, {"$set": {"alumni": True}})
+ DB["dis"].orcid.update_one(
+ {"_id": row["_id"]}, {"$set": {"alumni": True}}
+ )
def update_orcid():
- ''' Update the orcid collection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Update the orcid collection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
LOGGER.info(f"Started run (version {__version__})")
oids = {}
if ARG.GIVEN and ARG.FAMILY:
@@ -450,27 +469,34 @@ def update_orcid():
family, given = get_name(ARG.ORCID)
if family and given:
add_name(ARG.ORCID, oids, family, given)
- oids[ARG.ORCID]['orcid'] = ARG.ORCID
- COUNT['orcid'] += 1
+ oids[ARG.ORCID]["orcid"] = ARG.ORCID
+ COUNT["orcid"] += 1
add_janelia_info(oids)
- if 'employeeId' not in oids[ARG.ORCID]:
- oids[ARG.ORCID]['alumni'] = True
+ if "employeeId" not in oids[ARG.ORCID]:
+ oids[ARG.ORCID]["alumni"] = True
if not should_continue(oids[ARG.ORCID]):
LOGGER.warning("Record was not inserted")
terminate_program()
else:
# Get ORCIDs from the doi collection
- dcoll = DB['dis'].dois
+ dcoll = DB["dis"].dois
# Crossref
- payload = {"author.affiliation.name": {"$regex": "Janelia"},
- "author.ORCID": {"$exists": True}}
- project = {"author.given": 1, "author.family": 1,
- "author.ORCID": 1, "author.affiliation": 1, "doi": 1}
+ payload = {
+ "author.affiliation.name": {"$regex": "Janelia"},
+ "author.ORCID": {"$exists": True},
+ }
+ project = {
+ "author.given": 1,
+ "author.family": 1,
+ "author.ORCID": 1,
+ "author.affiliation": 1,
+ "doi": 1,
+ }
recs = dcoll.find(payload, project)
for rec in tqdm(recs, desc="Adding from doi collection"):
- COUNT['records'] += 1
- for aut in rec['author']:
- if 'ORCID' not in aut:
+ COUNT["records"] += 1
+ for aut in rec["author"]:
+ if "ORCID" not in aut:
continue
process_author(aut, oids, "crossref")
add_from_orcid(oids)
@@ -488,33 +514,57 @@ def update_orcid():
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Add ORCID information to MongoDB:orcid")
- PARSER.add_argument('--orcid', dest='ORCID', action='store',
- help='ORCID ID')
- PARSER.add_argument('--given', dest='GIVEN', action='store',
- help='Given name')
- PARSER.add_argument('--family', dest='FAMILY', action='store',
- help='Family name')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--force', dest='FORCE', action='store_true',
- default=False, help='Update ORCID ID whether correlated or not')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ description="Add ORCID information to MongoDB:orcid"
+ )
+ PARSER.add_argument("--orcid", dest="ORCID", action="store", help="ORCID ID")
+ PARSER.add_argument("--given", dest="GIVEN", action="store", help="Given name")
+ PARSER.add_argument("--family", dest="FAMILY", action="store", help="Family name")
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--force",
+ dest="FORCE",
+ action="store_true",
+ default=False,
+ help="Update ORCID ID whether correlated or not",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
CONFIG = configparser.ConfigParser()
- CONFIG.read('config.ini')
+ CONFIG.read("config.ini")
DISCONFIG = JRC.simplenamespace_to_dict(JRC.get_config("dis"))
update_orcid()
terminate_program()
diff --git a/sync/bin/update_preprints.py b/sync/bin/update_preprints.py
index a5e7204..09fd751 100644
--- a/sync/bin/update_preprints.py
+++ b/sync/bin/update_preprints.py
@@ -1,15 +1,15 @@
-""" update_preprints.py
- Update the jrc_preprint field in the dois collection for all locally-stored DOIs.
- Every preprint (from DataCite and Crossref) will be compared to every "primary" DOI
- (from Crossref) to determine if each pair is the same publication. The publication
- pair must have a RapidFuzz score greater than or equal to the threshold value.
- The first and last author for each pair must also match using the same criteria.
- For each pair with a title/author match, a relationship will be created between the
- DOIs. When all DOIs have been processed, the relationships will be written to the
- jrc_preprint field in the dois collection.
+"""update_preprints.py
+Update the jrc_preprint field in the dois collection for all locally-stored DOIs.
+Every preprint (from DataCite and Crossref) will be compared to every "primary" DOI
+(from Crossref) to determine if each pair is the same publication. The publication
+pair must have a RapidFuzz score greater than or equal to the threshold value.
+The first and last author for each pair must also match using the same criteria.
+For each pair with a title/author match, a relationship will be created between the
+DOIs. When all DOIs have been processed, the relationships will be written to the
+jrc_preprint field in the dois collection.
"""
-__version__ = '1.0.0'
+__version__ = "1.0.0"
import argparse
import collections
@@ -35,17 +35,27 @@
PREPRINTREL = {}
# Output data
AUDIT = []
-MATCH = {"DOI": [], "Title": [], "Score": [], "First author": [], "First author score": [],
- "Last author": [], "Last author score": [], "Publishing date": [], "Decision": []}
+MATCH = {
+ "DOI": [],
+ "Title": [],
+ "Score": [],
+ "First author": [],
+ "First author score": [],
+ "Last author": [],
+ "Last author score": [],
+ "Publishing date": [],
+ "Decision": [],
+}
MISSING = {}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -54,60 +64,76 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info(f"Connecting to {dbo.name} {ARG.MANIFOLD} on {dbo.host} as {dbo.user}")
+ LOGGER.info(
+ f"Connecting to {dbo.name} {ARG.MANIFOLD} on {dbo.host} as {dbo.user}"
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
LOGGER.info("Getting DOIs")
- projection = {"_id": 0, "DOI": 1, "doi": 1, "title": 1, "titles": 1,
- "author": 1, "creators": 1, "relation": 1,
- "published": 1, "published-print": 1, "published-online": 1,
- "posted": 1, "created": 1, "registered": 1}
+ projection = {
+ "_id": 0,
+ "DOI": 1,
+ "doi": 1,
+ "title": 1,
+ "titles": 1,
+ "author": 1,
+ "creators": 1,
+ "relation": 1,
+ "published": 1,
+ "published-print": 1,
+ "published-online": 1,
+ "posted": 1,
+ "created": 1,
+ "registered": 1,
+ }
try:
# Primary DOIs will all be from Crossref
- rows = DB['dis'].dois.find({"type": "journal-article"},
- projection)
+ rows = DB["dis"].dois.find({"type": "journal-article"}, projection)
except Exception as err:
terminate_program(err)
for row in rows:
- PRIMARY[row['doi']] = row
+ PRIMARY[row["doi"]] = row
LOGGER.info(f"Primary DOIs: {len(PRIMARY):,}")
try:
# Preprints will be from Crossref or DataCite
- rows = DB['dis'].dois.find({"$or": [{"type": "posted-content"},
- {"jrc_obtained_from": "DataCite"}],
- "doi": {"$not": {"$regex": "^10.25378/janelia."}}},
- projection)
+ rows = DB["dis"].dois.find(
+ {
+ "$or": [{"type": "posted-content"}, {"jrc_obtained_from": "DataCite"}],
+ "doi": {"$not": {"$regex": "^10.25378/janelia."}},
+ },
+ projection,
+ )
except Exception as err:
terminate_program(err)
for row in rows:
- PREPRINT[row['doi']] = row
+ PREPRINT[row["doi"]] = row
LOGGER.info(f"Preprint DOIs: {len(PREPRINT):,}")
def make_relationships(prerec, primrec):
- ''' Make relationships based on DOI record "relation" field
- Keyword arguments:
- prerec: preprint record
- primrec: primary record
- Returns:
- None
- '''
+ """Make relationships based on DOI record "relation" field
+ Keyword arguments:
+ prerec: preprint record
+ primrec: primary record
+ Returns:
+ None
+ """
if "relation" in primrec and "has-preprint" in primrec["relation"]:
for rec in primrec["relation"]["has-preprint"]:
if "id-type" in rec and rec["id-type"] == "doi":
@@ -119,13 +145,13 @@ def make_relationships(prerec, primrec):
def make_doi_relationships(predoi, primdoi):
- ''' Make relationships between two DOIs
- Keyword arguments:
- predoi: preprint DOI
- primdoi: primary DOI
- Returns:
- None
- '''
+ """Make relationships between two DOIs
+ Keyword arguments:
+ predoi: preprint DOI
+ primdoi: primary DOI
+ Returns:
+ None
+ """
# Find DOIs mnissing from dois collection
predoi = predoi.lower()
if predoi not in PREPRINT:
@@ -139,24 +165,24 @@ def make_doi_relationships(predoi, primdoi):
if primdoi not in PREPRINTREL[predoi]:
LOGGER.debug(f"Adding primary {primdoi} to {PREPRINTREL[predoi]}")
PREPRINTREL[predoi].append(primdoi)
- COUNT['preprint_relations'] += 1
+ COUNT["preprint_relations"] += 1
# Primary -> Preprint
if primdoi not in PRIMARYREL:
PRIMARYREL[primdoi] = []
if predoi not in PRIMARYREL[primdoi]:
LOGGER.debug(f"Adding preprint {predoi} to {PRIMARYREL[primdoi]}")
PRIMARYREL[primdoi].append(predoi)
- COUNT['primary_relations'] += 1
+ COUNT["primary_relations"] += 1
def process_pair(prerec, primrec):
- predoi = prerec['doi']
- primdoi = primrec['doi']
+ predoi = prerec["doi"]
+ primdoi = primrec["doi"]
pretitle = DL.get_title(prerec)
primtitle = DL.get_title(primrec)
if "relation" in prerec or "relation" in primrec:
make_relationships(prerec, primrec)
- COUNT['comparisons'] += 1
+ COUNT["comparisons"] += 1
score = fuzz.token_sort_ratio(pretitle, primtitle, processor=utils.default_process)
if score < ARG.THRESHOLD:
return
@@ -166,40 +192,46 @@ def process_pair(prerec, primrec):
authors = DL.get_author_list(primrec, returntype="list")
primfirst = authors[0]
primlast = authors[-1]
- MATCH['DOI'].extend([predoi, primdoi])
- MATCH['Title'].extend([pretitle, primtitle])
- MATCH['Score'].extend([score, score])
- MATCH['First author'].extend([prefirst, primfirst])
- first_score = fuzz.token_sort_ratio(prefirst, primfirst, processor=utils.default_process)
- MATCH['First author score'].extend([first_score, first_score])
- MATCH['Last author'].extend([prelast, primlast])
- last_score = fuzz.token_sort_ratio(prelast, primlast, processor=utils.default_process)
- MATCH['Last author score'].extend([last_score, last_score])
- MATCH['Publishing date'].extend([DL.get_publishing_date(prerec),
- DL.get_publishing_date(primrec)])
- COUNT['title_match'] += 1
+ MATCH["DOI"].extend([predoi, primdoi])
+ MATCH["Title"].extend([pretitle, primtitle])
+ MATCH["Score"].extend([score, score])
+ MATCH["First author"].extend([prefirst, primfirst])
+ first_score = fuzz.token_sort_ratio(
+ prefirst, primfirst, processor=utils.default_process
+ )
+ MATCH["First author score"].extend([first_score, first_score])
+ MATCH["Last author"].extend([prelast, primlast])
+ last_score = fuzz.token_sort_ratio(
+ prelast, primlast, processor=utils.default_process
+ )
+ MATCH["Last author score"].extend([last_score, last_score])
+ MATCH["Publishing date"].extend(
+ [DL.get_publishing_date(prerec), DL.get_publishing_date(primrec)]
+ )
+ COUNT["title_match"] += 1
if (first_score >= ARG.THRESHOLD) and (last_score >= ARG.THRESHOLD):
make_doi_relationships(predoi, primdoi)
- MATCH['Decision'].extend(["Relate", "Relate"])
- COUNT['title_author_match'] += 1
+ MATCH["Decision"].extend(["Relate", "Relate"])
+ COUNT["title_author_match"] += 1
else:
- MATCH['Decision'].extend(["", ""])
+ MATCH["Decision"].extend(["", ""])
def write_to_database():
- ''' Write relationships to the database
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Write relationships to the database
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
for predoi, primdois in tqdm(PREPRINTREL.items(), desc="Write preprints"):
AUDIT.append(f"{predoi} -> {primdois}")
if not ARG.WRITE:
continue
try:
- DB['dis'].dois.update_one({"doi": predoi},
- {"$set": {"jrc_preprint": primdois}})
+ DB["dis"].dois.update_one(
+ {"doi": predoi}, {"$set": {"jrc_preprint": primdois}}
+ )
except Exception as err:
terminate_program(err)
for primdoi, predois in tqdm(PRIMARYREL.items(), desc="Write primaries"):
@@ -207,40 +239,41 @@ def write_to_database():
if not ARG.WRITE:
continue
try:
- DB['dis'].dois.update_one({"doi": primdoi},
- {"$set": {"jrc_preprint": predois}})
+ DB["dis"].dois.update_one(
+ {"doi": primdoi}, {"$set": {"jrc_preprint": predois}}
+ )
except Exception as err:
terminate_program(err)
def add_jrc_preprint():
- ''' Update the jrc_preprint field in the dois collection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Update the jrc_preprint field in the dois collection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
for prerec in tqdm(PREPRINT.values(), desc="Preprints"):
for primrec in PRIMARY.values():
process_pair(prerec, primrec)
# Write to dois collection
write_to_database()
# Output files
- timestamp = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
+ timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
if AUDIT:
file_name = f"audit_{timestamp}.txt"
- with open(file_name, 'w', encoding='utf-8') as ostream:
+ with open(file_name, "w", encoding="utf-8") as ostream:
for line in AUDIT:
ostream.write(f"{line}\n")
LOGGER.warning(f"Audit written to {file_name}")
- if MATCH['DOI']:
+ if MATCH["DOI"]:
file_name = f"title_matches_{timestamp}.xlsx"
df = pd.DataFrame.from_dict(MATCH)
df.to_excel(file_name, index=False)
LOGGER.warning(f"Title matches written to {file_name}")
if MISSING:
file_name = f"missing_dois_{timestamp}.txt"
- with open(file_name, 'w', encoding='utf-8') as ostream:
+ with open(file_name, "w", encoding="utf-8") as ostream:
for line in MISSING:
ostream.write(f"{line}\n")
LOGGER.warning(f"Missing DOIs written to {file_name}")
@@ -257,22 +290,50 @@ def add_jrc_preprint():
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Update jrc_preprint in the dois collection")
- PARSER.add_argument('--threshold', dest='THRESHOLD', action='store',
- default=90, type=int, help='Fuzzy matching threshold')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ description="Update jrc_preprint in the dois collection"
+ )
+ PARSER.add_argument(
+ "--threshold",
+ dest="THRESHOLD",
+ action="store",
+ default=90,
+ type=int,
+ help="Fuzzy matching threshold",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/test/name_match/affiliations_regex.py b/test/name_match/affiliations_regex.py
index b8a9c9c..08b0000 100644
--- a/test/name_match/affiliations_regex.py
+++ b/test/name_match/affiliations_regex.py
@@ -1,40 +1,42 @@
import re
tests = [
- 'Janelia Research Campus, Ashburn VA', # want True
- '2Janelia', # want True
- 'Janelia', # want True
- 'The Janelia Farm', # want True
- ' janelia, ', # want True
- 'thejaneliafarm', # want True
- 'Howard Hughes Medical Institute, Ashburn', # want True
- '1HHMI, Ashburn, VA', # want True
- 'The Howard Hughes, Ashburn', # want True
- 'howardhughesmedicalinstitute, ashburnva', # want True
- 'Howard Hughes MedicalInstitute, Ashburn', # want True
- 'The Howard Hughes Medical Institute', # want False
- 'HHMI', # want False
- 'Howard Hughes Medical Institute, Seattle, WA', # want False
- 'Janeli' # want False
+ "Janelia Research Campus, Ashburn VA", # want True
+ "2Janelia", # want True
+ "Janelia", # want True
+ "The Janelia Farm", # want True
+ " janelia, ", # want True
+ "thejaneliafarm", # want True
+ "Howard Hughes Medical Institute, Ashburn", # want True
+ "1HHMI, Ashburn, VA", # want True
+ "The Howard Hughes, Ashburn", # want True
+ "howardhughesmedicalinstitute, ashburnva", # want True
+ "Howard Hughes MedicalInstitute, Ashburn", # want True
+ "The Howard Hughes Medical Institute", # want False
+ "HHMI", # want False
+ "Howard Hughes Medical Institute, Seattle, WA", # want False
+ "Janeli", # want False
]
ans = [True] * 11 + [False] * 4
+
def is_janelian(affilstr):
result = False
pattern = re.compile(
- r'(?i)(janelia|' # (?i) means case-insensitive; pattern matches "Janelia" in any form, e.g., "Janelia", "thejaneliafarm", etc.
- r'(ashburn.*(hhmi|howard\s*hughes))|' # "Ashburn" with "HHMI" or "Howard Hughes"
- r'(hhmi|howard\s*hughes).*ashburn)' # "HHMI" or "Howard Hughes" with "Ashburn"
+ r"(?i)(janelia|" # (?i) means case-insensitive; pattern matches "Janelia" in any form, e.g., "Janelia", "thejaneliafarm", etc.
+ r"(ashburn.*(hhmi|howard\s*hughes))|" # "Ashburn" with "HHMI" or "Howard Hughes"
+ r"(hhmi|howard\s*hughes).*ashburn)" # "HHMI" or "Howard Hughes" with "Ashburn"
)
if bool(re.search(pattern, affilstr)):
result = True
- return(result)
+ return result
+
res = [is_janelian(t) for t in tests]
-if res == ans:
- print('Pass')
+if res == ans:
+ print("Pass")
else:
- print('Fail')
- print(f'Result of regex:\n{res}')
+ print("Fail")
+ print(f"Result of regex:\n{res}")
diff --git a/test/name_match/db_connect.py b/test/name_match/db_connect.py
index 858734d..5245cb8 100644
--- a/test/name_match/db_connect.py
+++ b/test/name_match/db_connect.py
@@ -2,7 +2,8 @@
import sys
import jrc_common.jrc_common as JRC
-class DummyArg():
+
+class DummyArg:
def __init__(self):
self.VERBOSE = False
self.DEBUG = False
@@ -11,44 +12,44 @@ def __init__(self):
DB = {}
PROJECT = {}
+
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
- manifold = 'prod'
+ manifold = "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
try:
- rows = DB['dis'].project_map.find({})
+ rows = DB["dis"].project_map.find({})
except Exception as err:
terminate_program(err)
for row in rows:
- PROJECT[row['name']] = row['project']
+ PROJECT[row["name"]] = row["project"]
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
LOGGER.critical(msg)
sys.exit(-1 if msg else 0)
-
-
diff --git a/test/name_match/get_data_for_test_case.py b/test/name_match/get_data_for_test_case.py
index e003b03..4d407f5 100644
--- a/test/name_match/get_data_for_test_case.py
+++ b/test/name_match/get_data_for_test_case.py
@@ -7,20 +7,20 @@
try:
import name_match as nm
except:
- print('ERROR: Could not import name_match.py. Is it in your PYTHONPATH?')
+ print("ERROR: Could not import name_match.py. Is it in your PYTHONPATH?")
sys.exit(0)
db_connect.initialize_program()
LOGGER = JRC.setup_logging(db_connect.DummyArg())
-orcid_collection = db_connect.DB['dis'].orcid
-doi_collection = db_connect.DB['dis'].dois
+orcid_collection = db_connect.DB["dis"].orcid
+doi_collection = db_connect.DB["dis"].dois
-doi = '10.1101/2024.09.16.613338'
+doi = "10.1101/2024.09.16.613338"
doi_record = nm.doi_common.get_doi_record(doi, doi_collection)
all_authors = nm.get_author_objects(doi, doi_record, doi_collection)
print(f"janelians:{[a.name for a in all_authors if a.check==True]}")
-print( [nm.propose_candidates(a) for a in all_authors] )
+print([nm.propose_candidates(a) for a in all_authors])
guesses = [nm.propose_candidates(a) for a in all_authors]
for L in guesses:
for g in L:
diff --git a/test/name_match/tc_common.py b/test/name_match/tc_common.py
index df9c72b..8339d9e 100644
--- a/test/name_match/tc_common.py
+++ b/test/name_match/tc_common.py
@@ -1,17 +1,19 @@
-class TestCase():
+class TestCase:
def read_config(self, filename):
- with open(f'{filename}/config.txt', 'r') as config_file_obj:
- config_dict = {line.split(':')[0]: line.split(':')[1].rstrip('\n') for line in config_file_obj.readlines()}
+ with open(f"{filename}/config.txt", "r") as config_file_obj:
+ config_dict = {
+ line.split(":")[0]: line.split(":")[1].rstrip("\n")
+ for line in config_file_obj.readlines()
+ }
for key, value in config_dict.items():
setattr(self, key, value)
def candidate_ids(self):
- return(self.initial_candidate_employee_ids.split(","))
+ return self.initial_candidate_employee_ids.split(",")
def guesses(self):
return self.read_file(f"{self.dirname}/guesses.txt")
def read_file(self, filename):
- with open (filename, 'r') as inF:
- return inF.readlines()[0].rstrip('\n')
-
+ with open(filename, "r") as inF:
+ return inF.readlines()[0].rstrip("\n")
diff --git a/test/name_match/test1_authors_to_candidates.py b/test/name_match/test1_authors_to_candidates.py
index 1fd5530..7425b3e 100755
--- a/test/name_match/test1_authors_to_candidates.py
+++ b/test/name_match/test1_authors_to_candidates.py
@@ -15,23 +15,25 @@
try:
import name_match as nm
except:
- print('ERROR: Could not import name_match.py. Is it in your PYTHONPATH?')
+ print("ERROR: Could not import name_match.py. Is it in your PYTHONPATH?")
sys.exit(0)
# Boilerplate: initialize DB connection
db_connect.initialize_program()
-LOGGER = JRC.setup_logging(db_connect.DummyArg())
-orcid_collection = db_connect.DB['dis'].orcid
-doi_collection = db_connect.DB['dis'].dois
+LOGGER = JRC.setup_logging(db_connect.DummyArg())
+orcid_collection = db_connect.DB["dis"].orcid
+doi_collection = db_connect.DB["dis"].dois
-#Boilerplate: create a TestCase object (attributes come from config file)
+# Boilerplate: create a TestCase object (attributes come from config file)
config = tc_common.TestCase()
config.read_config(sys.argv[1])
-author_details = doi_common.get_author_details(doi_common.get_doi_record(config.doi, doi_collection), doi_collection) #IMPORTANT: NEED TO UPDATE THE SECOND ARG HERE... SOON
-all_authors = [ nm.create_author(author_record) for author_record in author_details]
+author_details = doi_common.get_author_details(
+ doi_common.get_doi_record(config.doi, doi_collection), doi_collection
+) # IMPORTANT: NEED TO UPDATE THE SECOND ARG HERE... SOON
+all_authors = [nm.create_author(author_record) for author_record in author_details]
ids = []
for a in all_authors:
@@ -39,16 +41,18 @@
ids.append(nm.name_search(name.first, name.last))
-if set(nm.flatten(ids)) == set(config.candidate_ids()): # use a set because order doesn't matter
- print('Pass: initial candidate employee IDs')
+if set(nm.flatten(ids)) == set(
+ config.candidate_ids()
+): # use a set because order doesn't matter
+ print("Pass: initial candidate employee IDs")
else:
- print(f'Fail: initial candidate employee IDs\nExpected:{config.candidate_ids()}\nReturned:{ids}')
-
-
+ print(
+ f"Fail: initial candidate employee IDs\nExpected:{config.candidate_ids()}\nReturned:{ids}"
+ )
guess_lists = [nm.propose_candidates(a) for a in all_authors]
-target = config.guesses() # Guess lists from file, represented as one string
+target = config.guesses() # Guess lists from file, represented as one string
# for i in range(min(len(str(guess_lists)), len(target))):
# if str(guess_lists)[i] != target[i]:
@@ -56,9 +60,8 @@
# break
if str(guess_lists) == target:
- print('Pass: initial proposed guesses')
+ print("Pass: initial proposed guesses")
else:
- print(f'Fail: initial proposed guesses\nExpected:{[e for e in target]}\nReturned:{[str(e)for e in guess_lists]}')
-
-
-
+ print(
+ f"Fail: initial proposed guesses\nExpected:{[e for e in target]}\nReturned:{[str(e)for e in guess_lists]}"
+ )
diff --git a/test/name_match/test2_janelia_authors.py b/test/name_match/test2_janelia_authors.py
index f69de6b..95a6109 100644
--- a/test/name_match/test2_janelia_authors.py
+++ b/test/name_match/test2_janelia_authors.py
@@ -14,34 +14,40 @@
try:
import name_match as nm
except:
- print('ERROR: Could not import name_match.py. Is it in your PYTHONPATH?')
+ print("ERROR: Could not import name_match.py. Is it in your PYTHONPATH?")
sys.exit(0)
-
# Boilerplate: initialize DB connection
db_connect.initialize_program()
-LOGGER = JRC.setup_logging(db_connect.DummyArg())
-orcid_collection = db_connect.DB['dis'].orcid
-doi_collection = db_connect.DB['dis'].dois
+LOGGER = JRC.setup_logging(db_connect.DummyArg())
+orcid_collection = db_connect.DB["dis"].orcid
+doi_collection = db_connect.DB["dis"].dois
-#Boilerplate: create a TestCase object (attributes come from config file)
+# Boilerplate: create a TestCase object (attributes come from config file)
config = tc_common.TestCase()
config.read_config(sys.argv[1])
-author_details_from_dis = doi_common.get_author_details(doi_common.get_doi_record(config.doi, doi_collection), doi_collection) #IMPORTANT: NEED TO UPDATE THE SECOND ARG HERE... SOON
+author_details_from_dis = doi_common.get_author_details(
+ doi_common.get_doi_record(config.doi, doi_collection), doi_collection
+) # IMPORTANT: NEED TO UPDATE THE SECOND ARG HERE... SOON
authors_from_dis = [nm.create_author(a) for a in author_details_from_dis]
-bool_results_from_dis = [nm.is_janelian(author, orcid_collection) for author in authors_from_dis]
+bool_results_from_dis = [
+ nm.is_janelian(author, orcid_collection) for author in authors_from_dis
+]
target = eval(config.janelians)
-test = dict(zip([a.name for a in authors_from_dis], bool_results_from_dis)) #Note this will fail if two authors have same name
+test = dict(
+ zip([a.name for a in authors_from_dis], bool_results_from_dis)
+) # Note this will fail if two authors have same name
test = [k for k, v in test.items() if v == True]
-if set(target) == set(test): # use sets because I don't care about order
- print('Pass: assess which authors have Janelia in the author affiliations')
+if set(target) == set(test): # use sets because I don't care about order
+ print("Pass: assess which authors have Janelia in the author affiliations")
else:
- print(f"Fail: assess which authors have Janelia in the author affiliations\nResult from file: {target}\nResult from DIS DB: {test}")
-
+ print(
+ f"Fail: assess which authors have Janelia in the author affiliations\nResult from file: {target}\nResult from DIS DB: {test}"
+ )
diff --git a/utility/bin/add_newsletter.py b/utility/bin/add_newsletter.py
index d7808b5..80094d6 100644
--- a/utility/bin/add_newsletter.py
+++ b/utility/bin/add_newsletter.py
@@ -1,6 +1,6 @@
-''' add_reviewed.py
- Update the jrc_newsletter date for one or more DOIs
-'''
+"""add_reviewed.py
+Update the jrc_newsletter date for one or more DOIs
+"""
import argparse
from datetime import datetime
@@ -12,15 +12,16 @@
# pylint: disable=broad-exception-caught,logging-fstring-interpolation
DB = {}
-COUNT = {'dois': 0, 'notfound': 0, 'updated': 0}
+COUNT = {"dois": 0, "notfound": 0, "updated": 0}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -29,20 +30,26 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize database connection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize database connection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -50,16 +57,16 @@ def initialize_program():
def update_single_doi(doi):
- """ Process a list of DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Process a list of DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
doi = doi.lower()
LOGGER.info(doi)
COUNT["dois"] += 1
- coll = DB['dis'].dois
+ coll = DB["dis"].dois
row = coll.find_one({"doi": doi})
if not row:
LOGGER.warning(f"{doi} was not found")
@@ -78,19 +85,21 @@ def update_single_doi(doi):
def process_dois():
- """ Process a list of DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Process a list of DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
if not ARG.DATE:
- ARG.DATE = datetime.today().strftime('%Y-%m-%d')
+ ARG.DATE = datetime.today().strftime("%Y-%m-%d")
else:
try:
- _ = datetime.strptime(ARG.DATE, '%Y-%m-%d')
+ _ = datetime.strptime(ARG.DATE, "%Y-%m-%d")
except ValueError:
- terminate_program(f"Supplied date {ARG.DATE} is not a valid date (YYYY-MM-DD)")
+ terminate_program(
+ f"Supplied date {ARG.DATE} is not a valid date (YYYY-MM-DD)"
+ )
if ARG.DOI:
update_single_doi(ARG.DOI)
elif ARG.FILE:
@@ -102,42 +111,75 @@ def process_dois():
LOGGER.error(f"Could not process {ARG.FILE}")
terminate_program(err)
print(f"DOIs read: {COUNT['dois']}")
- if COUNT['notfound']:
+ if COUNT["notfound"]:
print(f"DOIs not found: {COUNT['notfound']}")
print(f"DOIs updated: {COUNT['updated']}")
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Add a reviewed date to one or more DOIs")
+ description="Add a reviewed date to one or more DOIs"
+ )
GROUP_A = PARSER.add_mutually_exclusive_group(required=True)
- GROUP_A.add_argument('--doi', dest='DOI', action='store',
- help='Single DOI to process')
- GROUP_A.add_argument('--file', dest='FILE', action='store',
- help='File of DOIs to process')
- PARSER.add_argument('--date', dest='DATE', action='store',
- help='Newsletter date (defaults to today). Format: YYYY-MM-DD')
- PARSER.add_argument('--remove', dest='REMOVE', action='store_true',
- default=False, help='Remove jrc_newsletter from DOI(s)')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ GROUP_A.add_argument(
+ "--doi", dest="DOI", action="store", help="Single DOI to process"
+ )
+ GROUP_A.add_argument(
+ "--file", dest="FILE", action="store", help="File of DOIs to process"
+ )
+ PARSER.add_argument(
+ "--date",
+ dest="DATE",
+ action="store",
+ help="Newsletter date (defaults to today). Format: YYYY-MM-DD",
+ )
+ PARSER.add_argument(
+ "--remove",
+ dest="REMOVE",
+ action="store_true",
+ default=False,
+ help="Remove jrc_newsletter from DOI(s)",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
if ARG.DATE:
if ARG.REMOVE:
terminate_program("Specifying --date and --remove isn't permitted")
try:
- datetime.strptime(ARG.DATE, '%Y-%m-%d')
+ datetime.strptime(ARG.DATE, "%Y-%m-%d")
except ValueError:
terminate_program(f"{ARG.DATE} is an invalid date")
initialize_program()
diff --git a/utility/bin/add_preprint.py b/utility/bin/add_preprint.py
index ba743fd..77ba364 100644
--- a/utility/bin/add_preprint.py
+++ b/utility/bin/add_preprint.py
@@ -1,8 +1,8 @@
-""" add_preprint.py
- Associate two DOIs with a preprint relationship.
+"""add_preprint.py
+Associate two DOIs with a preprint relationship.
"""
-__version__ = '1.0.0'
+__version__ = "1.0.0"
import argparse
import collections
@@ -21,12 +21,12 @@
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -35,21 +35,27 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -57,41 +63,45 @@ def initialize_program():
def associate_dois(journal, preprint):
- """ Associate two DOIs
- Keyword arguments:
- journal: primary DOI record
- preprint: preprint DOI record
- Returns:
- payloadj: journal payload
- payloadp: preprint payload
+ """Associate two DOIs
+ Keyword arguments:
+ journal: primary DOI record
+ preprint: preprint DOI record
+ Returns:
+ payloadj: journal payload
+ payloadp: preprint payload
"""
- if 'jrc_preprint' in journal:
- if ARG.PREPRINT in journal['jrc_preprint']:
- LOGGER.warning(f"Preprint {ARG.PREPRINT} already associated with {ARG.JOURNAL}")
+ if "jrc_preprint" in journal:
+ if ARG.PREPRINT in journal["jrc_preprint"]:
+ LOGGER.warning(
+ f"Preprint {ARG.PREPRINT} already associated with {ARG.JOURNAL}"
+ )
else:
- journal['jrc_preprint'].append(ARG.PREPRINT)
+ journal["jrc_preprint"].append(ARG.PREPRINT)
else:
- journal['jrc_preprint'] = [ARG.PREPRINT]
- payloadj = {"jrc_preprint": journal['jrc_preprint']}
- if 'jrc_preprint' in preprint:
- if ARG.JOURNAL in preprint['jrc_preprint']:
- LOGGER.warning(f"Primary DOI {ARG.JOURNAL} already associated with {ARG.PREPRINT}")
+ journal["jrc_preprint"] = [ARG.PREPRINT]
+ payloadj = {"jrc_preprint": journal["jrc_preprint"]}
+ if "jrc_preprint" in preprint:
+ if ARG.JOURNAL in preprint["jrc_preprint"]:
+ LOGGER.warning(
+ f"Primary DOI {ARG.JOURNAL} already associated with {ARG.PREPRINT}"
+ )
else:
- preprint['jrc_preprint'].append(ARG.JOURNAL)
+ preprint["jrc_preprint"].append(ARG.JOURNAL)
else:
- preprint['jrc_preprint'] = [ARG.JOURNAL]
- payloadp = {"jrc_preprint": preprint['jrc_preprint']}
+ preprint["jrc_preprint"] = [ARG.JOURNAL]
+ payloadp = {"jrc_preprint": preprint["jrc_preprint"]}
return payloadj, payloadp
def add_jrc_preprint():
- """ Update jrc_preprint for specified DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Update jrc_preprint for specified DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
- coll = DB['dis'].dois
+ coll = DB["dis"].dois
# Get records
try:
journal = DL.get_doi_record(ARG.JOURNAL, coll)
@@ -116,38 +126,76 @@ def add_jrc_preprint():
# Associate DOIs
payloadj, payloadp = associate_dois(journal, preprint)
if ARG.WRITE:
- result = coll.update_one({"doi": journal['doi']}, {"$set": payloadj})
- if hasattr(result, 'matched_count') and result.matched_count:
- COUNT['updated'] += 1
- result = coll.update_one({"doi": preprint['doi']}, {"$set": payloadp})
- if hasattr(result, 'matched_count') and result.matched_count:
- COUNT['updated'] += 1
+ result = coll.update_one({"doi": journal["doi"]}, {"$set": payloadj})
+ if hasattr(result, "matched_count") and result.matched_count:
+ COUNT["updated"] += 1
+ result = coll.update_one({"doi": preprint["doi"]}, {"$set": payloadp})
+ if hasattr(result, "matched_count") and result.matched_count:
+ COUNT["updated"] += 1
else:
- print(f"Primary DOI {ARG.JOURNAL} updated with preprint:\n {json.dumps(payloadj)}")
- print(f"Preprint {ARG.PREPRINT} updated with primary DOI:\n {json.dumps(payloadp)}")
- COUNT['updated'] = 2
+ print(
+ f"Primary DOI {ARG.JOURNAL} updated with preprint:\n {json.dumps(payloadj)}"
+ )
+ print(
+ f"Preprint {ARG.PREPRINT} updated with primary DOI:\n {json.dumps(payloadp)}"
+ )
+ COUNT["updated"] = 2
print(f"Records updated: {COUNT['updated']}")
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Associate two DOIs with a preprint relationship")
- PARSER.add_argument('--journal', dest='JOURNAL', action='store',
- type=str.lower, required=True, help='Primary (non-preprint) DOI')
- PARSER.add_argument('--preprint', dest='PREPRINT', action='store',
- type=str.lower, required=True, help='Preprint DOI')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ description="Associate two DOIs with a preprint relationship"
+ )
+ PARSER.add_argument(
+ "--journal",
+ dest="JOURNAL",
+ action="store",
+ type=str.lower,
+ required=True,
+ help="Primary (non-preprint) DOI",
+ )
+ PARSER.add_argument(
+ "--preprint",
+ dest="PREPRINT",
+ action="store",
+ type=str.lower,
+ required=True,
+ help="Preprint DOI",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/utility/bin/citation_counts.py b/utility/bin/citation_counts.py
index ac737d0..2688132 100644
--- a/utility/bin/citation_counts.py
+++ b/utility/bin/citation_counts.py
@@ -1,8 +1,8 @@
-""" template.py
- Template program that connects to DIS database
+"""template.py
+Template program that connects to DIS database
"""
-__version__ = '1.0.0'
+__version__ = "1.0.0"
import argparse
import json
@@ -19,25 +19,26 @@
# Database
DB = {}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
LOGGER.critical(msg)
sys.exit(-1 if msg else 0)
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# API key
if "S2_API_KEY" not in os.environ:
terminate_program("Missing token - set in S2_API_KEY environment variable")
@@ -46,26 +47,34 @@ def initialize_program():
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
-def s2_citation_count(doi, fmt='plain'):
- ''' Get citation count from Semantic Scholar
- Keyword arguments:
- doi: DOI
- fmt: format (plain or html)
- Returns:
- Citation count
- '''
- url = f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}?fields=citationCount"
- headers = {'x-api-key': os.environ['S2_API_KEY']}
+def s2_citation_count(doi, fmt="plain"):
+ """Get citation count from Semantic Scholar
+ Keyword arguments:
+ doi: DOI
+ fmt: format (plain or html)
+ Returns:
+ Citation count
+ """
+ url = (
+ f"https://api.semanticscholar.org/graph/v1/paper/DOI:{doi}?fields=citationCount"
+ )
+ headers = {"x-api-key": os.environ["S2_API_KEY"]}
loop = 0
while loop <= 5:
loop += 1
@@ -83,43 +92,64 @@ def s2_citation_count(doi, fmt='plain'):
LOGGER.warning(f"Failed {resp.status_code}")
return 0
data = resp.json()
- cnt = data['citationCount']
- if fmt == 'html':
- cnt = f"" \
- + f"{cnt}"
+ cnt = data["citationCount"]
+ if fmt == "html":
+ cnt = (
+ f""
+ + f"{cnt}"
+ )
return cnt
except Exception:
return 0
def processing():
- ''' Main processing routine
- Keyword arguments:
- None
- Returns:
- None
- '''
- with open(ARG.FILE, 'r', encoding='ascii') as instream:
+ """Main processing routine
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
+ with open(ARG.FILE, "r", encoding="ascii") as instream:
for doi in instream.read().splitlines():
print(f"{doi.lower().strip()}\t{s2_citation_count(doi)}")
sleep(1)
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Template program")
- PARSER.add_argument('--file', dest='FILE', action='store',
- help='File of DOIs')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Template program")
+ PARSER.add_argument("--file", dest="FILE", action="store", help="File of DOIs")
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/utility/bin/delete_dois.py b/utility/bin/delete_dois.py
index 20dbf3b..085dbc5 100644
--- a/utility/bin/delete_dois.py
+++ b/utility/bin/delete_dois.py
@@ -1,6 +1,6 @@
-''' delete_dois.py
- Delete DOIs from the dois collection
-'''
+"""delete_dois.py
+Delete DOIs from the dois collection
+"""
import argparse
import collections
@@ -19,12 +19,12 @@
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -33,20 +33,26 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -54,12 +60,12 @@ def initialize_program():
def delete_dois():
- ''' Delete DOIs from the database
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Delete DOIs from the database
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
dois = []
try:
with open(ARG.FILE, "r", encoding="ascii") as instream:
@@ -71,20 +77,24 @@ def delete_dois():
for doi in tqdm(dois):
COUNT["read"] += 1
try:
- row = DB['dis'].dois.find_one({"doi": doi})
+ row = DB["dis"].dois.find_one({"doi": doi})
except Exception as err:
terminate_program(err)
if not row:
COUNT["missing"] += 1
LOGGER.warning(f"DOI {doi} not found")
continue
- if 'jrc_authors' in row or 'jrc_first_auohor' in row or 'jrc_last_auohor' in row:
+ if (
+ "jrc_authors" in row
+ or "jrc_first_auohor" in row
+ or "jrc_last_auohor" in row
+ ):
LOGGER.error(f"DOI {doi} has Janelia authors")
continue
if ARG.WRITE:
try:
- resp = DB['dis'].dois.delete_one({"doi": doi})
- COUNT['deleted'] += resp.deleted_count
+ resp = DB["dis"].dois.delete_one({"doi": doi})
+ COUNT["deleted"] += resp.deleted_count
except Exception as err:
terminate_program(err)
print(f"DOIs read: {COUNT['read']}")
@@ -94,20 +104,40 @@ def delete_dois():
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Delete DOIs from the dois collection")
- PARSER.add_argument('--file', dest='FILE', action='store',
- help='File of DOIs to process')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Actually delete DOIs')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Delete DOIs from the dois collection")
+ PARSER.add_argument(
+ "--file", dest="FILE", action="store", help="File of DOIs to process"
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Actually delete DOIs",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
DISCONFIG = JRC.simplenamespace_to_dict(JRC.get_config("dis"))
diff --git a/utility/bin/edit_orcid.py b/utility/bin/edit_orcid.py
index 02b1034..550b774 100644
--- a/utility/bin/edit_orcid.py
+++ b/utility/bin/edit_orcid.py
@@ -1,8 +1,8 @@
-""" edit_orcid.py
- Edit a record in the orcid collection
+"""edit_orcid.py
+Edit a record in the orcid collection
"""
-__version__ = '1.0.0'
+__version__ = "1.0.0"
import argparse
import json
@@ -17,13 +17,14 @@
# Database
DB = {}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -32,21 +33,27 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -54,43 +61,44 @@ def initialize_program():
def add_names(lookup_by, lookup, coll):
- ''' Update names in the orcid record
- Keyword arguments:
- lookup_by: "orcid" or "employeeId"
- lookup: lookup value
- coll: orcid collection
- Returns:
- None
- '''
+ """Update names in the orcid record
+ Keyword arguments:
+ lookup_by: "orcid" or "employeeId"
+ lookup: lookup value
+ coll: orcid collection
+ Returns:
+ None
+ """
family = []
given = []
if ARG.FAMILY:
family = [ARG.FAMILY]
if ARG.GIVEN:
given = [ARG.GIVEN]
- resp = DL.add_orcid_name(lookup_by=lookup_by, lookup=lookup, family=family,
- given=given, coll=coll)
+ resp = DL.add_orcid_name(
+ lookup_by=lookup_by, lookup=lookup, family=family, given=given, coll=coll
+ )
if resp:
print(json.dumps(resp, indent=2, default=str))
def update_orcid():
- ''' Update the orcid record
- Keyword arguments:
- None
- Returns:
- None
- '''
- coll = DB['dis'].orcid
+ """Update the orcid record
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
+ coll = DB["dis"].orcid
if ARG.FAMILY or ARG.GIVEN:
- lookup_by = 'orcid' if ARG.ORCID else 'employeeId'
+ lookup_by = "orcid" if ARG.ORCID else "employeeId"
else:
- lookup_by = 'orcid' if ARG.UPDATE == 'employeeId' else 'employeeId'
+ lookup_by = "orcid" if ARG.UPDATE == "employeeId" else "employeeId"
if not ARG.WRITE:
if ARG.FAMILY or ARG.GIVEN:
lookup = ARG.ORCID if ARG.ORCID else ARG.EMPLOYEE
else:
- lookup = ARG.ORCID if ARG.UPDATE == 'employeeId' else ARG.EMPLOYEE
+ lookup = ARG.ORCID if ARG.UPDATE == "employeeId" else ARG.EMPLOYEE
try:
row = DL.single_orcid_lookup(lookup, coll, lookup_by)
except Exception as err:
@@ -98,16 +106,20 @@ def update_orcid():
if not row:
terminate_program(f"Record not found for {lookup_by} {lookup}")
if ARG.FAMILY or ARG.GIVEN:
- if ARG.FAMILY and ARG.FAMILY not in row['family']:
- row['family'].append(ARG.FAMILY)
- if ARG.GIVEN and ARG.GIVEN not in row['given']:
- row['given'].append(ARG.GIVEN)
+ if ARG.FAMILY and ARG.FAMILY not in row["family"]:
+ row["family"].append(ARG.FAMILY)
+ if ARG.GIVEN and ARG.GIVEN not in row["given"]:
+ row["given"].append(ARG.GIVEN)
else:
- if 'employeeId' in row and 'orcid' in row and row['employeeId'] == ARG.EMPLOYEE \
- and row['orcid'] == ARG.ORCID:
+ if (
+ "employeeId" in row
+ and "orcid" in row
+ and row["employeeId"] == ARG.EMPLOYEE
+ and row["orcid"] == ARG.ORCID
+ ):
print(json_util.dumps(row, indent=2, default=str))
terminate_program("Record already has entered values")
- row[ARG.UPDATE] = ARG.EMPLOYEE if ARG.UPDATE == 'employeeId' else ARG.ORCID
+ row[ARG.UPDATE] = ARG.EMPLOYEE if ARG.UPDATE == "employeeId" else ARG.ORCID
LOGGER.warning("Would have updated record")
print(json_util.dumps(row, indent=2, default=str))
terminate_program()
@@ -115,7 +127,7 @@ def update_orcid():
add_names(lookup_by, ARG.ORCID if ARG.ORCID else ARG.EMPLOYEE, coll)
terminate_program()
try:
- if ARG.UPDATE == 'orcid':
+ if ARG.UPDATE == "orcid":
print(ARG.EMPLOYEE, ARG.ORCID, ARG.UPDATE)
resp = DL.update_existing_orcid(ARG.EMPLOYEE, ARG.ORCID, coll, lookup_by)
else:
@@ -129,38 +141,70 @@ def update_orcid():
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Update orcid record")
- PARSER.add_argument('--orcid', dest='ORCID', action='store',
- help='ORCID')
- PARSER.add_argument('--employee', dest='EMPLOYEE', action='store',
- help='Employee ID')
- PARSER.add_argument('--family', dest='FAMILY', action='store',
- help='Family name to add')
- PARSER.add_argument('--given', dest='GIVEN', action='store',
- help='Given name to add')
- PARSER.add_argument('--update', dest='UPDATE', action='store',
- default='orcid', choices=['orcid','employeeId'], help='Field to update')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Update orcid record")
+ PARSER.add_argument("--orcid", dest="ORCID", action="store", help="ORCID")
+ PARSER.add_argument(
+ "--employee", dest="EMPLOYEE", action="store", help="Employee ID"
+ )
+ PARSER.add_argument(
+ "--family", dest="FAMILY", action="store", help="Family name to add"
+ )
+ PARSER.add_argument(
+ "--given", dest="GIVEN", action="store", help="Given name to add"
+ )
+ PARSER.add_argument(
+ "--update",
+ dest="UPDATE",
+ action="store",
+ default="orcid",
+ choices=["orcid", "employeeId"],
+ help="Field to update",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
if ARG.ORCID:
if not ARG.EMPLOYEE and not ARG.FAMILY and not ARG.GIVEN:
- terminate_program("At least one of --employee, --family, and --given is required")
+ terminate_program(
+ "At least one of --employee, --family, and --given is required"
+ )
elif ARG.EMPLOYEE:
if not ARG.ORCID and not ARG.FAMILY and not ARG.GIVEN:
- terminate_program("At least one of --orcid, --family, and --given is required")
+ terminate_program(
+ "At least one of --orcid, --family, and --given is required"
+ )
else:
terminate_program("At least one of --orcid and --employee is required")
initialize_program()
diff --git a/utility/bin/find_missing_orcids.py b/utility/bin/find_missing_orcids.py
index 972c487..286dee9 100644
--- a/utility/bin/find_missing_orcids.py
+++ b/utility/bin/find_missing_orcids.py
@@ -1,6 +1,6 @@
-''' find_missing_orcids.py
- Search the People system for Janelians that have groups but are missing ORCIDs.
-'''
+"""find_missing_orcids.py
+Search the People system for Janelians that have groups but are missing ORCIDs.
+"""
import argparse
from operator import attrgetter
@@ -21,13 +21,14 @@
# Counters
COUNT = {"missing": 0, "calls": 0}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -36,99 +37,114 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
if "PEOPLE_API_KEY" not in os.environ:
terminate_program("Missing token - set in PEOPLE_API_KEY environment variable")
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
try:
- rows = DB['dis'].orcid.find({"group": {"$exists": True}})
+ rows = DB["dis"].orcid.find({"group": {"$exists": True}})
except Exception as err:
terminate_program(err)
for row in rows:
- GROUPS[row['group']] = True
+ GROUPS[row["group"]] = True
LOGGER.info(f"Found {len(GROUPS)} groups")
try:
- rows = DB['dis'].orcid.find({"orcid": {"$exists": True}})
+ rows = DB["dis"].orcid.find({"orcid": {"$exists": True}})
except Exception as err:
terminate_program(err)
for row in rows:
- if 'employeeId' in row:
- ORCIDS[row['employeeId']] = True
+ if "employeeId" in row:
+ ORCIDS[row["employeeId"]] = True
LOGGER.info(f"Found {len(ORCIDS)} correlated users with ORCIDs")
def process_person(person):
- ''' Process one person, and if they have a lab, check if they have an ORCID
- Keyword arguments:
- person: person record
- Returns:
- True is the record is missing, False otherwise
- '''
+ """Process one person, and if they have a lab, check if they have an ORCID
+ Keyword arguments:
+ person: person record
+ Returns:
+ True is the record is missing, False otherwise
+ """
try:
- rec = JRC.call_people_by_id(person['employeeId'])
+ rec = JRC.call_people_by_id(person["employeeId"])
except Exception as err:
terminate_program(err)
- COUNT['calls'] += 1
+ COUNT["calls"] += 1
if not rec:
return False
- if 'managedTeams' not in rec or not rec['managedTeams']:
+ if "managedTeams" not in rec or not rec["managedTeams"]:
return False
- for team in rec['managedTeams']:
- if team['supOrgSubType'] == 'Lab' and team['supOrgName'].endswith(' Lab'):
- if team['supOrgCode'] in DISCONFIG['sup_ignore']:
+ for team in rec["managedTeams"]:
+ if team["supOrgSubType"] == "Lab" and team["supOrgName"].endswith(" Lab"):
+ if team["supOrgCode"] in DISCONFIG["sup_ignore"]:
continue
- lab = team['supOrgName']
- if lab not in GROUPS or rec['employeeId'] not in ORCIDS:
- COUNT['missing'] += 1
- MISSING.append({'name': ' '.join([rec['nameFirstPreferred'],
- rec['nameLastPreferred']]),
- 'id': rec['employeeId'],
- 'group': lab})
+ lab = team["supOrgName"]
+ if lab not in GROUPS or rec["employeeId"] not in ORCIDS:
+ COUNT["missing"] += 1
+ MISSING.append(
+ {
+ "name": " ".join(
+ [rec["nameFirstPreferred"], rec["nameLastPreferred"]]
+ ),
+ "id": rec["employeeId"],
+ "group": lab,
+ }
+ )
return True
return False
def perform_search():
- ''' Search the People system for Janelians
- Keyword arguments:
- None
- Returns:
- None
- '''
- url = 'https://hhmipeople-prod.azurewebsites.net/People/Search/ByOther/JANELIA_SITE'
- headers = {'APIKey': os.environ['PEOPLE_API_KEY'],
- 'Content-Type': 'application/json'}
+ """Search the People system for Janelians
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
+ url = "https://hhmipeople-prod.azurewebsites.net/People/Search/ByOther/JANELIA_SITE"
+ headers = {
+ "APIKey": os.environ["PEOPLE_API_KEY"],
+ "Content-Type": "application/json",
+ }
try:
resp = requests.get(url, headers=headers, timeout=10)
except Exception as err:
terminate_program(err)
people = resp.json()
LOGGER.info(f"Found {len(people):,} Janelians in People")
- pbar = tqdm(people, desc='Missing records: 0')
+ pbar = tqdm(people, desc="Missing records: 0")
for person in pbar:
- if person['locationName'] == 'Janelia Research Campus' and \
- 'employeeId' in person:
- if process_person(person) :
+ if (
+ person["locationName"] == "Janelia Research Campus"
+ and "employeeId" in person
+ ):
+ if process_person(person):
pbar.set_description(f"Missing records: {COUNT['missing']}")
if not MISSING:
return
- maxl = {'name': 0, 'id': 0, 'group': 0}
+ maxl = {"name": 0, "id": 0, "group": 0}
for person in MISSING:
for key in maxl:
if len(person[key]) > maxl[key]:
@@ -136,24 +152,40 @@ def perform_search():
print(f"{'Name':{maxl['name']}} {'ID':{maxl['id']}} {'Group':{maxl['group']}}")
print(f"{'-'*maxl['name']} {'-'*maxl['id']} {'-'*maxl['group']}")
for person in MISSING:
- print(f"{person['name']:{maxl['name']}} {person['id']:{maxl['id']}} " \
- + f"{person['group']:{maxl['group']}}")
+ print(
+ f"{person['name']:{maxl['name']}} {person['id']:{maxl['id']}} "
+ + f"{person['group']:{maxl['group']}}"
+ )
print(f"Calls to People system: {COUNT['calls']}")
print(f"Missing ORCIDs: {COUNT['missing']}")
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Look up a person by name")
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Look up a person by name")
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
DISCONFIG = JRC.simplenamespace_to_dict(JRC.get_config("dis"))
diff --git a/utility/bin/get_citation.py b/utility/bin/get_citation.py
index 4e57868..82ac63c 100644
--- a/utility/bin/get_citation.py
+++ b/utility/bin/get_citation.py
@@ -1,7 +1,7 @@
-''' get_citation.py
- Get the citation(s) for one or more DOIs through the DIS DB API.
- Print to terminal for easy copy-pasting to HughesHub.
-'''
+"""get_citation.py
+Get the citation(s) for one or more DOIs through the DIS DB API.
+Print to terminal for easy copy-pasting to HughesHub.
+"""
import requests
import argparse
@@ -14,48 +14,48 @@
class Item:
def __init__(self, citation=None, preprint=None):
- self.citation = citation # a string
- self.preprint = preprint # If the DOI is a journal article, this is a list. else None
+ self.citation = citation # a string
+ self.preprint = (
+ preprint # If the DOI is a journal article, this is a list. else None
+ )
def create_item(doi):
rest = JRC.get_config("rest_services")
url_base = attrgetter("dis.url")(rest)
- response = get_request(f"{url_base}citation/dis/{replace_slashes_in_doi(strip_doi_if_provided_as_url(doi))}")
+ response = get_request(
+ f"{url_base}citation/dis/{replace_slashes_in_doi(strip_doi_if_provided_as_url(doi))}"
+ )
if response:
doi_record = get_doi_record(doi)
item_type = get_type(doi_record)
- citation = response['data']
- if 'jrc_preprint' in response and item_type == 'Journal article':
- return(Item(
- citation = citation,
- preprint = response['jrc_preprint']
- ))
+ citation = response["data"]
+ if "jrc_preprint" in response and item_type == "Journal article":
+ return Item(citation=citation, preprint=response["jrc_preprint"])
else:
- return(Item(
- citation = citation,
- preprint = None
- ))
+ return Item(citation=citation, preprint=None)
else:
- print(colored( (f'WARNING: Unable to retrieve a citation for {doi}'), "yellow" ))
- return(None)
+ print(colored((f"WARNING: Unable to retrieve a citation for {doi}"), "yellow"))
+ return None
+
def get_doi_record(doi):
rest = JRC.get_config("rest_services")
url_base = attrgetter("dis.url")(rest)
- url = f'{url_base}doi/{replace_slashes_in_doi(strip_doi_if_provided_as_url(doi))}'
+ url = f"{url_base}doi/{replace_slashes_in_doi(strip_doi_if_provided_as_url(doi))}"
response = get_request(url)
- return( response['data'] )
+ return response["data"]
+
def get_type(doi_record):
- if 'type' in doi_record: # crossref
- if doi_record['type'] == 'journal-article':
- return('Journal article')
- if doi_record['type'] == 'posted-content':
- if doi_record['subtype'] == 'preprint':
- return('Preprint')
- else: # datacite
- return(doi_record['types']['resourceTypeGeneral'])
+ if "type" in doi_record: # crossref
+ if doi_record["type"] == "journal-article":
+ return "Journal article"
+ if doi_record["type"] == "posted-content":
+ if doi_record["subtype"] == "preprint":
+ return "Preprint"
+ else: # datacite
+ return doi_record["types"]["resourceTypeGeneral"]
### Functions for formatting and printing citations
@@ -70,26 +70,27 @@ def parse_ris(lines):
for line in lines:
try:
code, content = line.split(" - ")[0], line.split(" - ")[1].strip()
- if code == 'T1':
+ if code == "T1":
title = content
- if code == 'DO':
+ if code == "DO":
doi = content
- if code == 'AU':
+ if code == "AU":
authors.append(HumanName(content))
except:
continue
- author_str = ', '.join([f"{name.last}, {''.join(name.initials_list()[:-1])}" for name in authors])
+ author_str = ", ".join(
+ [f"{name.last}, {''.join(name.initials_list()[:-1])}" for name in authors]
+ )
citation = f"{author_str}. {title}. https://doi.org/{doi}."
return citation
-
def print_citation(item):
if item.preprint:
print(f"{item.citation}")
for n in range(len(item.preprint)):
- if n == len(item.preprint)-1:
+ if n == len(item.preprint) - 1:
print(f"Preprint: https://doi.org/{item.preprint[n]}\n")
else:
print(f"Preprint: https://doi.org/{item.preprint[n]}")
@@ -97,15 +98,16 @@ def print_citation(item):
print(f"{item.citation}\n")
+### Miscellaneous low-level functions
-
-### Miscellaneous low-level functions
-
def replace_slashes_in_doi(doi):
- return( doi.replace("/", "%2F") ) # e.g. 10.1186/s12859-024-05732-7 becomes 10.1186%2Fs12859-024-05732-7
+ return doi.replace(
+ "/", "%2F"
+ ) # e.g. 10.1186/s12859-024-05732-7 becomes 10.1186%2Fs12859-024-05732-7
+
-def strip_doi_if_provided_as_url(doi, substring=".org/10.", doi_index_in_substring = 5):
+def strip_doi_if_provided_as_url(doi, substring=".org/10.", doi_index_in_substring=5):
# Find all occurrences of the substring
occurrences = [i for i in range(len(doi)) if doi.startswith(substring, i)]
if len(occurrences) > 1:
@@ -113,46 +115,61 @@ def strip_doi_if_provided_as_url(doi, substring=".org/10.", doi_index_in_substri
exit(1) # Exit with a warning code
elif len(occurrences) == 1:
doi_index_in_string = occurrences[0]
- stripped_doi = doi[doi_index_in_string + doi_index_in_substring:]
- return(stripped_doi)
+ stripped_doi = doi[doi_index_in_string + doi_index_in_substring :]
+ return stripped_doi
else:
- return(doi)
+ return doi
+
def get_request(url):
- headers = { 'Content-Type': 'application/json' }
+ headers = {"Content-Type": "application/json"}
response = requests.get(url, headers=headers)
if response.status_code == 200:
- return(response.json())
+ return response.json()
else:
- print(f"ERROR: GET request status code: {response.status_code}. Error message: {response.reason}")
+ print(
+ f"ERROR: GET request status code: {response.status_code}. Error message: {response.reason}"
+ )
# sys.exit(1)
- return(None)
+ return None
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
parser = argparse.ArgumentParser(
- description="Turn a list of DOIs into a list of citations in Janelia Science News format.")
+ description="Turn a list of DOIs into a list of citations in Janelia Science News format."
+ )
muexgroup = parser.add_mutually_exclusive_group(required=True)
- muexgroup.add_argument('--doi', dest='DOI', action='store',
- help='Produce a citation from a single DOI.')
- muexgroup.add_argument('--file', dest='FILE', action='store',
- help='Produce a citation from a file containing one or more DOIs.')
- muexgroup.add_argument('--ris', dest='RIS', action='store',
- help='Print citations from a .ris file.')
-
-
+ muexgroup.add_argument(
+ "--doi",
+ dest="DOI",
+ action="store",
+ help="Produce a citation from a single DOI.",
+ )
+ muexgroup.add_argument(
+ "--file",
+ dest="FILE",
+ action="store",
+ help="Produce a citation from a file containing one or more DOIs.",
+ )
+ muexgroup.add_argument(
+ "--ris", dest="RIS", action="store", help="Print citations from a .ris file."
+ )
+
arg = parser.parse_args()
-
+
items = []
if arg.DOI:
- items.append( create_item(arg.DOI.strip().lower()) )
+ items.append(create_item(arg.DOI.strip().lower()))
if arg.FILE:
try:
with open(arg.FILE, "r") as inF:
for doi in inF.read().splitlines():
- if doi.strip(): # don't throw an error if you encounter an empty line
- items.append( create_item(doi.strip().lower()) )
+ if (
+ doi.strip()
+ ): # don't throw an error if you encounter an empty line
+ items.append(create_item(doi.strip().lower()))
except:
print(f"Could not process {arg.FILE}")
raise ImportError
@@ -160,11 +177,10 @@ def get_request(url):
if arg.RIS:
print(parse_ris(arg.RIS))
sys.exit(0)
-
+
items = [i for i in items if i is not None]
for item in sorted(items, key=lambda i: i.citation):
print_citation(item)
-
# debugging: 10.7554/elife.90523 is a journal article with multiple preprints
diff --git a/utility/bin/name_match.py b/utility/bin/name_match.py
index ece1fe6..ae36c5e 100644
--- a/utility/bin/name_match.py
+++ b/utility/bin/name_match.py
@@ -5,7 +5,7 @@
and finally directly overwrites the previous jrc_author list.
If the author has an ORCID on the paper that isn't in our collection, this script will
-create an ORCID record for that person. If appropriate, it will add an employee ID to
+create an ORCID record for that person. If appropriate, it will add an employee ID to
an existing ORCID record.
This script will not create new "employeeId only" records in the ORCID collection.
@@ -30,119 +30,199 @@
import doi_common.doi_common as doi_common
-
-
class Author:
- """ Author objects are constructed solely from the Crossref-provided author information. """
+ """Author objects are constructed solely from the Crossref-provided author information."""
+
def __init__(self, name, orcid=None, affiliations=None):
self.name = name
self.orcid = orcid
- self.affiliations = affiliations if affiliations is not None else [] # Need to avoid the python mutable arguments trap
+ self.affiliations = (
+ affiliations if affiliations is not None else []
+ ) # Need to avoid the python mutable arguments trap
+
class Employee:
- """ Employees are constructed from information found in the HHMI People database. """
- def __init__(self, id=None, job_title=None, email=None, location=None, supOrgName=None, first_names=None, middle_names=None, last_names=None, exists=False):
+ """Employees are constructed from information found in the HHMI People database."""
+
+ def __init__(
+ self,
+ id=None,
+ job_title=None,
+ email=None,
+ location=None,
+ supOrgName=None,
+ first_names=None,
+ middle_names=None,
+ last_names=None,
+ exists=False,
+ ):
self.id = id
self.job_title = job_title
self.email = email
self.location = location
self.supOrgName = supOrgName
- self.first_names = list(set(first_names)) if first_names is not None else [] # Need to avoid the python mutable arguments trap
+ self.first_names = (
+ list(set(first_names)) if first_names is not None else []
+ ) # Need to avoid the python mutable arguments trap
self.middle_names = list(set(middle_names)) if middle_names is not None else []
self.last_names = list(set(last_names)) if last_names is not None else []
self.exists = exists
+
class Guess(Employee):
- """ A Guess is a subtype of Employee that consists of just ONE name permutation
- (e.g. Gerald M Rubin) and a fuzzy match score (calculated before the guess object is instantiated). """
- def __init__(self, id=None, job_title=None, email=None, location=None, supOrgName=None, first_names=None, middle_names=None, last_names=None, exists=False, name=None, score=None, approved=False):
- super().__init__(id, job_title, email, location, supOrgName, first_names, middle_names, last_names, exists)
+ """A Guess is a subtype of Employee that consists of just ONE name permutation
+ (e.g. Gerald M Rubin) and a fuzzy match score (calculated before the guess object is instantiated)."""
+
+ def __init__(
+ self,
+ id=None,
+ job_title=None,
+ email=None,
+ location=None,
+ supOrgName=None,
+ first_names=None,
+ middle_names=None,
+ last_names=None,
+ exists=False,
+ name=None,
+ score=None,
+ approved=False,
+ ):
+ super().__init__(
+ id,
+ job_title,
+ email,
+ location,
+ supOrgName,
+ first_names,
+ middle_names,
+ last_names,
+ exists,
+ )
self.name = name
self.score = score
self.approved = approved
+
def __repr__(self):
- attrs = {k:v for k, v in self.__dict__.items() if v}
+ attrs = {k: v for k, v in self.__dict__.items() if v}
return f"Guess({', '.join(f'{k}={v}' for k, v in attrs.items())})"
- #return '"Guess(' + ', '.join(f'{k}={v}' for k, v in attrs.items()) + ')"'
+ # return '"Guess(' + ', '.join(f'{k}={v}' for k, v in attrs.items()) + ')"'
+
class MongoOrcidRecord:
def __init__(self, orcid=None, employeeId=None, exists=False):
self.orcid = orcid
self.employeeId = employeeId
self.exists = exists
+
def has_orcid(self):
- return(True if self.orcid else False)
- def has_employeeId(self):
- return(True if self.employeeId else False)
+ return True if self.orcid else False
+ def has_employeeId(self):
+ return True if self.employeeId else False
### Functions for instantiating objects of my custom classes
+
def create_author(author_info):
- if 'given' in author_info and 'family' in author_info:
- name = ' '.join((author_info['given'], author_info['family']))
- elif 'name' in author_info: # e.g. if 'FlyLight Project Team' is an author
- name = author_info['name']
+ if "given" in author_info and "family" in author_info:
+ name = " ".join((author_info["given"], author_info["family"]))
+ elif "name" in author_info: # e.g. if 'FlyLight Project Team' is an author
+ name = author_info["name"]
else:
- sys.exit("ERROR: Neither 'family', 'given', nor 'name' is present in one of the author records.")
- orcid = author_info['paper_orcid'] if 'paper_orcid' in author_info else None
- affiliations = author_info['affiliations'] if author_info['asserted'] == True else None
- return(Author(name, orcid, affiliations))
+ sys.exit(
+ "ERROR: Neither 'family', 'given', nor 'name' is present in one of the author records."
+ )
+ orcid = author_info["paper_orcid"] if "paper_orcid" in author_info else None
+ affiliations = (
+ author_info["affiliations"] if author_info["asserted"] == True else None
+ )
+ return Author(name, orcid, affiliations)
def create_employee(id):
- idsearch_results = search_people_api(id, mode='id')
+ idsearch_results = search_people_api(id, mode="id")
if idsearch_results:
- job_title = job_title = idsearch_results['businessTitle'].strip() if 'businessTitle' in idsearch_results else None
- email = idsearch_results['email'].strip() if 'email' in idsearch_results else None
- location = idsearch_results['locationName'].strip() if 'locationName'in idsearch_results else None # will be 'Janelia Research Campus' for janelians
- supOrgName = idsearch_results['supOrgName'].strip() if 'supOrgName' in idsearch_results and idsearch_results['supOrgName'] else None
- first_names = [ idsearch_results['nameFirstPreferred'].strip() if idsearch_results['nameFirstPreferred'] else None, idsearch_results['nameFirst'].strip() if idsearch_results['nameFirst'] else None]
- middle_names = [ idsearch_results['nameMiddlePreferred'].strip() if idsearch_results['nameMiddlePreferred'] else None, idsearch_results['nameMiddle'].strip() if idsearch_results['nameMiddle'] else None]
- last_names = [ idsearch_results['nameLastPreferred'].strip() if idsearch_results['nameLastPreferred'] else None, idsearch_results['nameLast'].strip() if idsearch_results['nameLast'] else None ]
- return(
- Employee(
+ job_title = job_title = (
+ idsearch_results["businessTitle"].strip()
+ if "businessTitle" in idsearch_results
+ else None
+ )
+ email = (
+ idsearch_results["email"].strip() if "email" in idsearch_results else None
+ )
+ location = (
+ idsearch_results["locationName"].strip()
+ if "locationName" in idsearch_results
+ else None
+ ) # will be 'Janelia Research Campus' for janelians
+ supOrgName = (
+ idsearch_results["supOrgName"].strip()
+ if "supOrgName" in idsearch_results and idsearch_results["supOrgName"]
+ else None
+ )
+ first_names = [
+ idsearch_results["nameFirstPreferred"].strip()
+ if idsearch_results["nameFirstPreferred"]
+ else None,
+ idsearch_results["nameFirst"].strip()
+ if idsearch_results["nameFirst"]
+ else None,
+ ]
+ middle_names = [
+ idsearch_results["nameMiddlePreferred"].strip()
+ if idsearch_results["nameMiddlePreferred"]
+ else None,
+ idsearch_results["nameMiddle"].strip()
+ if idsearch_results["nameMiddle"]
+ else None,
+ ]
+ last_names = [
+ idsearch_results["nameLastPreferred"].strip()
+ if idsearch_results["nameLastPreferred"]
+ else None,
+ idsearch_results["nameLast"].strip()
+ if idsearch_results["nameLast"]
+ else None,
+ ]
+ return Employee(
id=id,
job_title=job_title,
email=email,
location=location,
- supOrgName = supOrgName,
- first_names=first_names,
+ supOrgName=supOrgName,
+ first_names=first_names,
middle_names=middle_names,
last_names=last_names,
- exists=True)
+ exists=True,
)
else:
- return(Employee(exists=False))
+ return Employee(exists=False)
def create_guess(employee, name=None, score=None):
- return(Guess(
- employee.id,
- employee.job_title,
+ return Guess(
+ employee.id,
+ employee.job_title,
employee.email,
employee.location,
- employee.supOrgName,
- employee.first_names,
- employee.middle_names,
+ employee.supOrgName,
+ employee.first_names,
+ employee.middle_names,
employee.last_names,
- employee.exists,
- name,
- score
- )
+ employee.exists,
+ name,
+ score,
)
-
-
-
-
-
-
### Functions for matching authors to employees
-def get_corresponding_employee(author, orcid_collection, verbose_arg, write_arg): # The high-level decision tree that is the core procedure of this script
+
+def get_corresponding_employee(
+ author, orcid_collection, verbose_arg, write_arg
+): # The high-level decision tree that is the core procedure of this script
final_choice = None
if author.orcid:
@@ -152,81 +232,150 @@ def get_corresponding_employee(author, orcid_collection, verbose_arg, write_arg)
if mongo_orcid_record.has_employeeId():
employee = create_employee(mongo_orcid_record.employeeId)
final_choice = employee
- add_preferred_names_to_complete_orcid_record(mongo_orcid_record, author, employee, orcid_collection, verbose_arg)
+ add_preferred_names_to_complete_orcid_record(
+ mongo_orcid_record, author, employee, orcid_collection, verbose_arg
+ )
else:
- best_guess = guess_employee(author, f"{author.name} has an ORCID on this paper. They are in our ORCID collection, but without an employee ID.", verbose_arg)
+ best_guess = guess_employee(
+ author,
+ f"{author.name} has an ORCID on this paper. They are in our ORCID collection, but without an employee ID.",
+ verbose_arg,
+ )
if best_guess.approved:
final_choice = best_guess
- add_id_and_names_to_incomplete_orcid_record(best_guess, author, 'id', orcid_collection, write_arg)
+ add_id_and_names_to_incomplete_orcid_record(
+ best_guess, author, "id", orcid_collection, write_arg
+ )
elif not mongo_orcid_record.exists:
- best_guess = guess_employee(author, f"{author.name} has an ORCID on this paper, but this ORCID is not in our collection.", verbose_arg)
+ best_guess = guess_employee(
+ author,
+ f"{author.name} has an ORCID on this paper, but this ORCID is not in our collection.",
+ verbose_arg,
+ )
if best_guess.approved:
final_choice = best_guess
- mongo_orcid_record = get_mongo_orcid_record(best_guess.id, orcid_collection)
- if mongo_orcid_record.exists:
- if not mongo_orcid_record.has_orcid(): # If the author has a never-before-seen orcid, but their employeeId is already in our collection
- print(f"{author.name} is in our collection, with an employee ID only.")
- add_id_and_names_to_incomplete_orcid_record(best_guess, author, 'orcid', orcid_collection, write_arg)
- elif mongo_orcid_record.has_orcid(): # Hopefully this will never get triggered, i.e., if one person has two ORCIDs
- print(f"{author.name}'s ORCID is {author.orcid} on the paper, but it's {mongo_orcid_record.orcid} in our collection. Aborting attempt to edit their records in our collection.")
+ mongo_orcid_record = get_mongo_orcid_record(
+ best_guess.id, orcid_collection
+ )
+ if mongo_orcid_record.exists:
+ if not mongo_orcid_record.has_orcid(): # If the author has a never-before-seen orcid, but their employeeId is already in our collection
+ print(
+ f"{author.name} is in our collection, with an employee ID only."
+ )
+ add_id_and_names_to_incomplete_orcid_record(
+ best_guess, author, "orcid", orcid_collection, write_arg
+ )
+ elif mongo_orcid_record.has_orcid(): # Hopefully this will never get triggered, i.e., if one person has two ORCIDs
+ print(
+ f"{author.name}'s ORCID is {author.orcid} on the paper, but it's {mongo_orcid_record.orcid} in our collection. Aborting attempt to edit their records in our collection."
+ )
else:
- print(f"{author.name} has an ORCID on this paper, and they are not in our collection.")
+ print(
+ f"{author.name} has an ORCID on this paper, and they are not in our collection."
+ )
create_orcid_record(best_guess, orcid_collection, author, write_arg)
elif not author.orcid:
- best_guess = guess_employee(author, f"{author.name} does not have an ORCID on this paper.", verbose_arg)
+ best_guess = guess_employee(
+ author, f"{author.name} does not have an ORCID on this paper.", verbose_arg
+ )
if best_guess.approved:
final_choice = best_guess
- return(final_choice)
+ return final_choice
def guess_employee(author, inform_message, verbose_arg):
candidates = propose_candidates(author)
best_guess = evaluate_candidates(author, candidates, inform_message, verbose_arg)
- return(best_guess)
+ return best_guess
+
def propose_candidates(author):
- """
- Given an author object, search the People API for one or more matches using the People Search.
- Arguments:
+ """
+ Given an author object, search the People API for one or more matches using the People Search.
+ Arguments:
author: an author object.
Returns:
A list of guess objects. This list will never be empty. It may, however, simply contain one 'empty' guess object.
"""
name = HumanName(author.name)
basic = name_search(name.first, name.last)
- stripped = name_search(unidecode(name.first), unidecode(name.last)) # decode accents and other special characters
- hyphen_split1 = name_search(name.first, name.last.split('-')[0]) if '-' in name.last else None # try different parts of a hyphenated last name
- hyphen_split2 = name_search(name.first, name.last.split('-')[1]) if '-' in name.last else None
- strp_hyph1 = name_search(unidecode(name.first), unidecode(name.last.split('-')[0])) if '-' in name.last else None # split on hyphen and decoded
- strp_hyph2 = name_search(unidecode(name.first), unidecode(name.last.split('-')[1])) if '-' in name.last else None
- two_middle_names1 = name_search(name.first, name.middle.split(' ')[0]) if len(name.middle.split())==2 else None # try different parts of a multi-word middle name, e.g. Virginia Marjorie Tartaglio Scarlett
- two_middle_names2 = name_search(name.first, name.middle.split(' ')[1]) if len(name.middle.split())==2 else None
- strp_middle1 = name_search(unidecode(name.first), unidecode(name.middle.split()[0])) if len(name.middle.split())==2 else None # split on middle name space and decoded
- strp_middle2 = name_search(unidecode(name.first), unidecode(name.middle.split()[1])) if len(name.middle.split())==2 else None
- all_results = [basic, stripped, hyphen_split1, hyphen_split2, strp_hyph1, strp_hyph2, two_middle_names1, two_middle_names2, strp_middle1, strp_middle2]
+ stripped = name_search(
+ unidecode(name.first), unidecode(name.last)
+ ) # decode accents and other special characters
+ hyphen_split1 = (
+ name_search(name.first, name.last.split("-")[0]) if "-" in name.last else None
+ ) # try different parts of a hyphenated last name
+ hyphen_split2 = (
+ name_search(name.first, name.last.split("-")[1]) if "-" in name.last else None
+ )
+ strp_hyph1 = (
+ name_search(unidecode(name.first), unidecode(name.last.split("-")[0]))
+ if "-" in name.last
+ else None
+ ) # split on hyphen and decoded
+ strp_hyph2 = (
+ name_search(unidecode(name.first), unidecode(name.last.split("-")[1]))
+ if "-" in name.last
+ else None
+ )
+ two_middle_names1 = (
+ name_search(name.first, name.middle.split(" ")[0])
+ if len(name.middle.split()) == 2
+ else None
+ ) # try different parts of a multi-word middle name, e.g. Virginia Marjorie Tartaglio Scarlett
+ two_middle_names2 = (
+ name_search(name.first, name.middle.split(" ")[1])
+ if len(name.middle.split()) == 2
+ else None
+ )
+ strp_middle1 = (
+ name_search(unidecode(name.first), unidecode(name.middle.split()[0]))
+ if len(name.middle.split()) == 2
+ else None
+ ) # split on middle name space and decoded
+ strp_middle2 = (
+ name_search(unidecode(name.first), unidecode(name.middle.split()[1]))
+ if len(name.middle.split()) == 2
+ else None
+ )
+ all_results = [
+ basic,
+ stripped,
+ hyphen_split1,
+ hyphen_split2,
+ strp_hyph1,
+ strp_hyph2,
+ two_middle_names1,
+ two_middle_names2,
+ strp_middle1,
+ strp_middle2,
+ ]
candidate_ids = [id for id in list(set(flatten(all_results))) if id is not None]
candidate_employees = [create_employee(id) for id in candidate_ids]
- candidate_employees = [e for e in candidate_employees if e.location == 'Janelia Research Campus']
- return(fuzzy_match(author, candidate_employees))
+ candidate_employees = [
+ e for e in candidate_employees if e.location == "Janelia Research Campus"
+ ]
+ return fuzzy_match(author, candidate_employees)
def name_search(first, last):
- """
- Arguments:
+ """
+ Arguments:
first: first name, a string.
last: last name, a string.
Returns:
A list of candidate employee ids (strings) OR None.
"""
- search_results1 = search_people_api(first, mode='name') # a list of dicts
- search_results2 = search_people_api(last, mode='name')
+ search_results1 = search_people_api(first, mode="name") # a list of dicts
+ search_results2 = search_people_api(last, mode="name")
if search_results1 and search_results2:
- return( process_search_results(search_results1, search_results2) )
+ return process_search_results(search_results1, search_results2)
else:
- return(None)
+ return None
+
def process_search_results(list1, list2):
"""
@@ -239,17 +388,18 @@ def process_search_results(list1, list2):
OR
None
"""
- employee_ids_list1 = {item['employeeId'] for item in list1}
- employee_ids_list2 = {item['employeeId'] for item in list2}
+ employee_ids_list1 = {item["employeeId"] for item in list1}
+ employee_ids_list2 = {item["employeeId"] for item in list2}
common_ids = list(employee_ids_list1.intersection(employee_ids_list2))
if common_ids:
- return(common_ids)
+ return common_ids
else:
- return(None)
+ return None
+
def fuzzy_match(author, candidate_employees):
- """
- Arguments:
+ """
+ Arguments:
author: an author object.
candidate_employees: a list of employee objects, possibly an empty list.
Returns:
@@ -258,25 +408,31 @@ def fuzzy_match(author, candidate_employees):
guesses = []
if candidate_employees:
for employee in candidate_employees:
- employee_permuted_names = generate_name_permutations(employee.first_names, employee.middle_names, employee.last_names)
+ employee_permuted_names = generate_name_permutations(
+ employee.first_names, employee.middle_names, employee.last_names
+ )
for name in employee_permuted_names:
- guesses.append(create_guess(employee, name=name)) # Each employee will generate several guesses, e.g. Virginia T Scarlett, Virginia Scarlett, Ginnie Scarlett
+ guesses.append(
+ create_guess(employee, name=name)
+ ) # Each employee will generate several guesses, e.g. Virginia T Scarlett, Virginia Scarlett, Ginnie Scarlett
if guesses:
for guess in guesses:
- guess.score = fuzz.token_sort_ratio(author.name, guess.name, processor=utils.default_process) #processor will convert the strings to lowercase, remove non-alphanumeric characters, and trim whitespace
- high_score = max( [g.score for g in guesses] )
- winners = [ g for g in guesses if g.score == high_score ]
- return(winners)
+ guess.score = fuzz.token_sort_ratio(
+ author.name, guess.name, processor=utils.default_process
+ ) # processor will convert the strings to lowercase, remove non-alphanumeric characters, and trim whitespace
+ high_score = max([g.score for g in guesses])
+ winners = [g for g in guesses if g.score == high_score]
+ return winners
elif not guesses:
- return( [ Guess(exists=False) ] )
+ return [Guess(exists=False)]
def evaluate_candidates(author, candidates, inform_message, verbose=False):
- """
- A function that lets the user manually evaluate the best-guess employee for a given author.
+ """
+ A function that lets the user manually evaluate the best-guess employee for a given author.
Arguments:
author: an author object.
- candidates: a non-empty list of guess objects.
+ candidates: a non-empty list of guess objects.
inform_message: an informational message will be printed to the terminal if verbose==True OR if some action is needed.
one of:
f"{author.name} is in our ORCID collection, but without an employee ID."
@@ -285,243 +441,368 @@ def evaluate_candidates(author, candidates, inform_message, verbose=False):
verbose: a boolean, passed from command line.
Returns:
A guess object. If this guess.exists == False, this indicates that the guess was rejected, either by the user or automatically due to low score.
- """
+ """
if len(candidates) > 1:
print(inform_message)
print(f"Multiple high scoring matches found for {author.name}:")
- # Some people appear twice in the HHMI People system. Sometimes this is just bad bookkeeping,
+ # Some people appear twice in the HHMI People system. Sometimes this is just bad bookkeeping,
# and sometimes it's because two employees have the same exact name.
# inquirer gives us no way of knowing whether the user selected the first instance of 'David Clapham' or the second instance of 'David Clapham'.
# We are appending numbers to the names to make them unique, and then using the index of the selected object to grab the original object.
- repeat_names = [name for name, count in Counter(guess.name for guess in candidates).items() if count > 1]
+ repeat_names = [
+ name
+ for name, count in Counter(guess.name for guess in candidates).items()
+ if count > 1
+ ]
selection_list = []
counter = {}
for guess in candidates:
if guess.name in repeat_names:
if guess.name in counter:
- selection_list.append(Guess(id=guess.id,job_title=guess.job_title,email=guess.email,location=guess.location,supOrgName = guess.supOrgName,first_names=guess.first_names,middle_names=guess.middle_names,last_names=guess.last_names, exists=guess.exists,
- name=guess.name+f'-{counter[guess.name]+1}',score=guess.score))
+ selection_list.append(
+ Guess(
+ id=guess.id,
+ job_title=guess.job_title,
+ email=guess.email,
+ location=guess.location,
+ supOrgName=guess.supOrgName,
+ first_names=guess.first_names,
+ middle_names=guess.middle_names,
+ last_names=guess.last_names,
+ exists=guess.exists,
+ name=guess.name + f"-{counter[guess.name]+1}",
+ score=guess.score,
+ )
+ )
counter[guess.name] += 1
else:
- selection_list.append(Guess(id=guess.id,job_title=guess.job_title,email=guess.email,location=guess.location,supOrgName = guess.supOrgName,first_names=guess.first_names,middle_names=guess.middle_names,last_names=guess.last_names, exists=guess.exists,
- name=guess.name+f'-1',score=guess.score))
+ selection_list.append(
+ Guess(
+ id=guess.id,
+ job_title=guess.job_title,
+ email=guess.email,
+ location=guess.location,
+ supOrgName=guess.supOrgName,
+ first_names=guess.first_names,
+ middle_names=guess.middle_names,
+ last_names=guess.last_names,
+ exists=guess.exists,
+ name=guess.name + f"-1",
+ score=guess.score,
+ )
+ )
counter[guess.name] = 1
else:
selection_list.append(guess)
for guess in selection_list:
- print(colored(f"{guess.name}, ID: {guess.id}, job title: {guess.job_title}, supOrgName: {guess.supOrgName}, email: {guess.email}", 'black', 'on_yellow'))
- quest = [inquirer.Checkbox('decision',
- carousel=True,
- message="Choose a person from the list",
- choices=[guess.name for guess in selection_list] + ['None of the above'],
- default=['None of the above'])]
- ans = inquirer.prompt(quest, theme=BlueComposure()) # returns {'decision': a list}, e.g. {'decision': ['Virginia Scarlett']}
- while len(ans['decision']) > 1:
- print('Please choose only one option.')
- quest = [inquirer.Checkbox('decision',
- carousel=True,
- message="Choose a person from the list",
- choices=[guess.name for guess in selection_list] + ['None of the above'],
- default=['None of the above'])]
- ans = inquirer.prompt(quest, theme=BlueComposure())
- if ans['decision'] != ['None of the above']:
- index = [guess.name for guess in selection_list].index(ans['decision'][0])
+ print(
+ colored(
+ f"{guess.name}, ID: {guess.id}, job title: {guess.job_title}, supOrgName: {guess.supOrgName}, email: {guess.email}",
+ "black",
+ "on_yellow",
+ )
+ )
+ quest = [
+ inquirer.Checkbox(
+ "decision",
+ carousel=True,
+ message="Choose a person from the list",
+ choices=[guess.name for guess in selection_list]
+ + ["None of the above"],
+ default=["None of the above"],
+ )
+ ]
+ ans = inquirer.prompt(
+ quest, theme=BlueComposure()
+ ) # returns {'decision': a list}, e.g. {'decision': ['Virginia Scarlett']}
+ while len(ans["decision"]) > 1:
+ print("Please choose only one option.")
+ quest = [
+ inquirer.Checkbox(
+ "decision",
+ carousel=True,
+ message="Choose a person from the list",
+ choices=[guess.name for guess in selection_list]
+ + ["None of the above"],
+ default=["None of the above"],
+ )
+ ]
+ ans = inquirer.prompt(quest, theme=BlueComposure())
+ if ans["decision"] != ["None of the above"]:
+ index = [guess.name for guess in selection_list].index(ans["decision"][0])
winner = candidates[index]
winner.approved = True
- return(winner)
- elif ans['decision'] == ['None of the above']:
+ return winner
+ elif ans["decision"] == ["None of the above"]:
print(f"No action will be taken for {author.name}.\n")
- return( Guess(exists=False) )
+ return Guess(exists=False)
elif len(candidates) == 1:
best_guess = candidates[0]
if not best_guess.exists:
if verbose:
- print(f"A Janelian named {author.name} could not be found in the HHMI People API. No action to take.\n")
- return(best_guess)
+ print(
+ f"A Janelian named {author.name} could not be found in the HHMI People API. No action to take.\n"
+ )
+ return best_guess
if float(best_guess.score) < 85.0:
if verbose:
print(inform_message)
print(
f"Employee best guess: {best_guess.name}, ID: {best_guess.id}, job title: {best_guess.job_title}, supOrgName: {best_guess.supOrgName}, email: {best_guess.email}, Confidence: {round(best_guess.score, ndigits = 3)}\n"
- )
- return( Guess(exists=False) )
+ )
+ return Guess(exists=False)
elif float(best_guess.score) > 85.0:
print(inform_message)
- print(colored(
- f"Employee best guess: {best_guess.name}, ID: {best_guess.id}, job title: {best_guess.job_title}, supOrgName: {best_guess.supOrgName}, email: {best_guess.email}, Confidence: {round(best_guess.score, ndigits = 3)}",
- "black", "on_yellow"
- ))
- quest = [inquirer.List('decision',
- message=f"Add {best_guess.name} to this paper's Janelia authors?",
- choices=['Yes', 'No'])]
+ print(
+ colored(
+ f"Employee best guess: {best_guess.name}, ID: {best_guess.id}, job title: {best_guess.job_title}, supOrgName: {best_guess.supOrgName}, email: {best_guess.email}, Confidence: {round(best_guess.score, ndigits = 3)}",
+ "black",
+ "on_yellow",
+ )
+ )
+ quest = [
+ inquirer.List(
+ "decision",
+ message=f"Add {best_guess.name} to this paper's Janelia authors?",
+ choices=["Yes", "No"],
+ )
+ ]
ans = inquirer.prompt(quest, theme=BlueComposure())
- if ans['decision'] == 'Yes':
+ if ans["decision"] == "Yes":
best_guess.approved = True
- return(best_guess)
+ return best_guess
else:
print(f"No action will be taken for {author.name}.\n")
- return( Guess(exists=False) )
-
-
-
+ return Guess(exists=False)
### Functions to search and write to database
+
def get_author_objects(doi, doi_record, doi_collection):
print_title(doi, doi_record)
- all_authors = [ create_author(author_record) for author_record in doi_common.get_author_details(doi_record, doi_collection)]
+ all_authors = [
+ create_author(author_record)
+ for author_record in doi_common.get_author_details(doi_record, doi_collection)
+ ]
all_authors = set_author_check_attr(all_authors)
# If the paper has affiliations, we will only check those authors with janelia affiliations. Otherwise, we will check all authors.
print_janelia_authors(all_authors)
- return(all_authors)
+ return all_authors
+
def set_author_check_attr(all_authors):
new_author_list = all_authors
if not any([a.affiliations for a in all_authors]):
for i in range(len(new_author_list)):
- setattr(new_author_list[i], 'check', True)
+ setattr(new_author_list[i], "check", True)
else:
pattern = re.compile(
- r'(?i)(janelia|' # (?i) means case-insensitive; pattern matches "Janelia" in any form, e.g., "Janelia", "thejaneliafarm", etc.
- r'(ashburn.*(hhmi|howard\s*hughes))|' # "Ashburn" with "HHMI" or "Howard Hughes"
- r'(hhmi|howard\s*hughes).*ashburn)' # "HHMI" or "Howard Hughes" with "Ashburn"
+ r"(?i)(janelia|" # (?i) means case-insensitive; pattern matches "Janelia" in any form, e.g., "Janelia", "thejaneliafarm", etc.
+ r"(ashburn.*(hhmi|howard\s*hughes))|" # "Ashburn" with "HHMI" or "Howard Hughes"
+ r"(hhmi|howard\s*hughes).*ashburn)" # "HHMI" or "Howard Hughes" with "Ashburn"
)
for i in range(len(new_author_list)):
- setattr(new_author_list[i], 'check', is_janelian(new_author_list[i], pattern, orcid_collection))
- return(new_author_list)
+ setattr(
+ new_author_list[i],
+ "check",
+ is_janelian(new_author_list[i], pattern, orcid_collection),
+ )
+ return new_author_list
+
def is_janelian(author, pattern, orcid_collection):
result = False
if author.orcid:
- if doi_common.single_orcid_lookup(author.orcid, orcid_collection, 'orcid'):
+ if doi_common.single_orcid_lookup(author.orcid, orcid_collection, "orcid"):
result = True
if bool(re.search(pattern, " ".join(author.affiliations))):
result = True
- return(result)
-
-def add_preferred_names_to_complete_orcid_record(mongo_orcid_record, author, employee, orcid_collection, verbose_arg):
- doi_common.add_orcid_name(lookup=author.orcid, lookup_by='orcid', given=first_names_for_orcid_record(author, employee), family=last_names_for_orcid_record(author, employee), coll=orcid_collection)
+ return result
+
+
+def add_preferred_names_to_complete_orcid_record(
+ mongo_orcid_record, author, employee, orcid_collection, verbose_arg
+):
+ doi_common.add_orcid_name(
+ lookup=author.orcid,
+ lookup_by="orcid",
+ given=first_names_for_orcid_record(author, employee),
+ family=last_names_for_orcid_record(author, employee),
+ coll=orcid_collection,
+ )
if verbose_arg:
- print( f"{author.name} has an ORCID on this paper. They are in our ORCID collection, with both an ORCID an employee ID.\n" )
+ print(
+ f"{author.name} has an ORCID on this paper. They are in our ORCID collection, with both an ORCID an employee ID.\n"
+ )
-def add_id_and_names_to_incomplete_orcid_record(employee, author, to_add, orcid_collection, write_arg):
- if to_add not in {'id', 'orcid'}:
- raise ValueError("to_add argument to add_id_and_names_to_incomplete_orcid_record() must be either 'orcid' or 'id'.")
- if to_add == 'id':
- if not doi_common.single_orcid_lookup(employee.id, orcid_collection, 'employeeId'):
+
+def add_id_and_names_to_incomplete_orcid_record(
+ employee, author, to_add, orcid_collection, write_arg
+):
+ if to_add not in {"id", "orcid"}:
+ raise ValueError(
+ "to_add argument to add_id_and_names_to_incomplete_orcid_record() must be either 'orcid' or 'id'."
+ )
+ if to_add == "id":
+ if not doi_common.single_orcid_lookup(
+ employee.id, orcid_collection, "employeeId"
+ ):
if write_arg:
- doi_common.update_existing_orcid(lookup=author.orcid, lookup_by='orcid', coll=orcid_collection, add=employee.id)
- doi_common.add_orcid_name(lookup=author.orcid, lookup_by='orcid', coll=orcid_collection, given=first_names_for_orcid_record(author, best_guess), family=last_names_for_orcid_record(author, best_guess))
+ doi_common.update_existing_orcid(
+ lookup=author.orcid,
+ lookup_by="orcid",
+ coll=orcid_collection,
+ add=employee.id,
+ )
+ doi_common.add_orcid_name(
+ lookup=author.orcid,
+ lookup_by="orcid",
+ coll=orcid_collection,
+ given=first_names_for_orcid_record(author, best_guess),
+ family=last_names_for_orcid_record(author, best_guess),
+ )
else:
- print(f'ERROR: {author.name} has at least two records in our orcid collection. Aborting attempt to add employeeId {employee.id} to existing record for this ORCID: {author.orcid}')
- if to_add == 'orcid':
- if not doi_common.single_orcid_lookup(author.orcid, orcid_collection, 'orcid'):
+ print(
+ f"ERROR: {author.name} has at least two records in our orcid collection. Aborting attempt to add employeeId {employee.id} to existing record for this ORCID: {author.orcid}"
+ )
+ if to_add == "orcid":
+ if not doi_common.single_orcid_lookup(author.orcid, orcid_collection, "orcid"):
if write_arg:
- doi_common.update_existing_orcid(lookup=employee.id, lookup_by='employeeId', add=author.orcid)
- doi_common.add_orcid_name(lookup=employee.id, lookup_by='employeeId', coll=orcid_collection, given=first_names_for_orcid_record(author, best_guess), family=last_names_for_orcid_record(author, best_guess))
+ doi_common.update_existing_orcid(
+ lookup=employee.id, lookup_by="employeeId", add=author.orcid
+ )
+ doi_common.add_orcid_name(
+ lookup=employee.id,
+ lookup_by="employeeId",
+ coll=orcid_collection,
+ given=first_names_for_orcid_record(author, best_guess),
+ family=last_names_for_orcid_record(author, best_guess),
+ )
else:
- print(f'ERROR: {author.name} has two records in our orcid collection. Aborting attempt to add orcid {author.orcid} to existing record for this employeeId: {employee.id}')
+ print(
+ f"ERROR: {author.name} has two records in our orcid collection. Aborting attempt to add orcid {author.orcid} to existing record for this employeeId: {employee.id}"
+ )
+
def create_orcid_record(best_guess, orcid_collection, author, write_arg):
if write_arg:
- doi_common.add_orcid(best_guess.id, orcid_collection, given=first_names_for_orcid_record(author, best_guess), family=last_names_for_orcid_record(author, best_guess), orcid=author.orcid)
+ doi_common.add_orcid(
+ best_guess.id,
+ orcid_collection,
+ given=first_names_for_orcid_record(author, best_guess),
+ family=last_names_for_orcid_record(author, best_guess),
+ orcid=author.orcid,
+ )
print(f"Record created for {author.name} in orcid collection.")
+
def generate_name_permutations(first_names, middle_names, last_names):
- middle_names = [n for n in middle_names if n not in ('', None)] # some example middle_names, from HHMI People system: [None], ['D.', ''], ['Marie Sophie'], ['', '']
- permutations = set()
- # All possible first names + all possible last names
- for first_name, last_name in itertools.product(first_names, last_names):
- permutations.add(
- f"{first_name} {last_name}"
- )
- # All possible first names + all possible middle names + all possible last names
- if middle_names:
- for first_name, middle_name, last_name in itertools.product(first_names, middle_names, last_names):
- permutations.add(
- f"{first_name} {middle_name} {last_name}"
- )
+ middle_names = [
+ n for n in middle_names if n not in ("", None)
+ ] # some example middle_names, from HHMI People system: [None], ['D.', ''], ['Marie Sophie'], ['', '']
+ permutations = set()
+ # All possible first names + all possible last names
+ for first_name, last_name in itertools.product(first_names, last_names):
+ permutations.add(f"{first_name} {last_name}")
+ # All possible first names + all possible middle names + all possible last names
+ if middle_names:
+ for first_name, middle_name, last_name in itertools.product(
+ first_names, middle_names, last_names
+ ):
+ permutations.add(f"{first_name} {middle_name} {last_name}")
# All possible first names + all possible middle initials + all possible last names
- for first_name, middle_name, last_name in itertools.product(first_names, middle_names, last_names):
- middle_initial = middle_name[0]
- permutations.add(
- f"{first_name} {middle_initial} {last_name}"
- )
- return list(sorted(permutations))
+ for first_name, middle_name, last_name in itertools.product(
+ first_names, middle_names, last_names
+ ):
+ middle_initial = middle_name[0]
+ permutations.add(f"{first_name} {middle_initial} {last_name}")
+ return list(sorted(permutations))
+
def first_names_for_orcid_record(author, employee):
result = generate_name_permutations(
- [HumanName(author.name).first]+employee.first_names,
- [HumanName(author.name).middle]+employee.middle_names,
- [HumanName(author.name).last]+employee.last_names
+ [HumanName(author.name).first] + employee.first_names,
+ [HumanName(author.name).middle] + employee.middle_names,
+ [HumanName(author.name).last] + employee.last_names,
)
h_result = [HumanName(n) for n in result]
- return(list(set([' '.join((n.first,n.middle)).strip() for n in h_result])))
+ return list(set([" ".join((n.first, n.middle)).strip() for n in h_result]))
+
def last_names_for_orcid_record(author, employee):
result = generate_name_permutations(
- [HumanName(author.name).first]+employee.first_names,
- [HumanName(author.name).middle]+employee.middle_names,
- [HumanName(author.name).last]+employee.last_names
+ [HumanName(author.name).first] + employee.first_names,
+ [HumanName(author.name).middle] + employee.middle_names,
+ [HumanName(author.name).last] + employee.last_names,
)
h_result = [HumanName(n) for n in result]
- return(list(set([n.last for n in h_result])))
+ return list(set([n.last for n in h_result]))
def get_mongo_orcid_record(search_term, orcid_collection):
if not search_term:
- return(MongoOrcidRecord(exists=False))
+ return MongoOrcidRecord(exists=False)
else:
- result = ''
- if len(search_term) == 19: #ORCIDs are guaranteed to be 16 digits (plus the hyphens)
- result = doi_common.single_orcid_lookup(search_term, orcid_collection, 'orcid')
+ result = ""
+ if (
+ len(search_term) == 19
+ ): # ORCIDs are guaranteed to be 16 digits (plus the hyphens)
+ result = doi_common.single_orcid_lookup(
+ search_term, orcid_collection, "orcid"
+ )
else:
- result = doi_common.single_orcid_lookup(search_term, orcid_collection, 'employeeId')
+ result = doi_common.single_orcid_lookup(
+ search_term, orcid_collection, "employeeId"
+ )
if result:
- if 'orcid' in result and 'employeeId' in result:
- return(MongoOrcidRecord(orcid=result['orcid'], employeeId=result['employeeId'], exists=True))
- if 'orcid' in result and 'employeeId' not in result:
- return(MongoOrcidRecord(orcid=result['orcid'], exists=True))
- if 'orcid' not in result and 'employeeId' in result:
- return(MongoOrcidRecord(employeeId=result['employeeId'], exists=True))
+ if "orcid" in result and "employeeId" in result:
+ return MongoOrcidRecord(
+ orcid=result["orcid"], employeeId=result["employeeId"], exists=True
+ )
+ if "orcid" in result and "employeeId" not in result:
+ return MongoOrcidRecord(orcid=result["orcid"], exists=True)
+ if "orcid" not in result and "employeeId" in result:
+ return MongoOrcidRecord(employeeId=result["employeeId"], exists=True)
else:
- return(MongoOrcidRecord(exists=False))
+ return MongoOrcidRecord(exists=False)
+
def search_people_api(query, mode):
response = None
- if mode not in {'name', 'id'}:
+ if mode not in {"name", "id"}:
sys.exit("ERROR: HHMI People API search mode must be either 'name' or 'id'.")
- if mode == 'name':
+ if mode == "name":
response = JRC.call_people_by_name(query)
- elif mode == 'id':
+ elif mode == "id":
response = JRC.call_people_by_id(query)
- return(response)
+ return response
+
def strip_orcid_if_provided_as_url(orcid):
prefixes = ["http://orcid.org/", "https://orcid.org/"]
for prefix in prefixes:
if orcid.startswith(prefix):
- return orcid[len(prefix):]
- return(orcid)
+ return orcid[len(prefix) :]
+ return orcid
+
def overwrite_jrc_author(revised_jrc_authors):
id_list = [e.id for e in revised_jrc_authors]
id_list = [id for id in id_list if id]
- payload = {'jrc_author': id_list}
+ payload = {"jrc_author": id_list}
doi_common.update_jrc_fields(doi, doi_collection, payload)
- print(colored( ('jrc_author field has been updated.'), 'green' ))
-
-
-
-
+ print(colored(("jrc_author field has been updated."), "green"))
### Miscellaneous low-level functions and variables
+
def get_dois_from_commandline(doi_arg, file_arg):
- dois = [doi_arg.lower()] if doi_arg else [] # .lower() because our collection is case-sensitive
+ dois = (
+ [doi_arg.lower()] if doi_arg else []
+ ) # .lower() because our collection is case-sensitive
if file_arg:
try:
with open(file_arg, "r", encoding="ascii") as instream:
@@ -530,18 +811,23 @@ def get_dois_from_commandline(doi_arg, file_arg):
except Exception as err:
print(f"Could not process {file_arg}")
exit()
- return(dois)
+ return dois
+
def print_title(doi, doi_record):
- if 'titles' in doi_record: # DataCite
+ if "titles" in doi_record: # DataCite
print(f"{doi}: {doi_record['titles'][0]['title']}")
- else: # Crossref
+ else: # Crossref
print(f"{doi}: {doi_record['title'][0]}")
+
def print_janelia_authors(all_authors):
- print(", ".join( [a.name for a in all_authors if a.check == True] ))
+ print(", ".join([a.name for a in all_authors if a.check == True]))
+
-def flatten(xs): # https://stackoverflow.com/questions/2158395/flatten-an-irregular-arbitrarily-nested-list-of-lists
+def flatten(
+ xs,
+): # https://stackoverflow.com/questions/2158395/flatten-an-irregular-arbitrarily-nested-list-of-lists
for x in xs:
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
yield from flatten(x)
@@ -552,40 +838,42 @@ def flatten(xs): # https://stackoverflow.com/questions/2158395/flatten-an-irregu
DB = {}
PROJECT = {}
+
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
- manifold = 'prod'
+ manifold = "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
try:
- rows = DB['dis'].project_map.find({})
+ rows = DB["dis"].project_map.find({})
except Exception as err:
terminate_program(err)
for row in rows:
- PROJECT[row['name']] = row['project']
+ PROJECT[row["name"]] = row["project"]
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -593,47 +881,74 @@ def terminate_program(msg=None):
sys.exit(-1 if msg else 0)
-
-
-
-
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
parser = argparse.ArgumentParser(
- description = "Given a DOI, use fuzzy name matching to correlate Janelia authors who don't have ORCIDs to Janelia employees. Update ORCID records as needed.")
+ description="Given a DOI, use fuzzy name matching to correlate Janelia authors who don't have ORCIDs to Janelia employees. Update ORCID records as needed."
+ )
muexgroup = parser.add_mutually_exclusive_group(required=True)
- muexgroup.add_argument('--doi', dest='DOI', action='store',
- help='Curate janelia authors for a single DOI.')
- muexgroup.add_argument('--file', dest='FILE', action='store',
- help='Curate janelia authors for multiple DOIs in a file.')
- parser.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- parser.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
- parser.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write results to database. If --write is missing, no changes to the database will be made.')
-
+ muexgroup.add_argument(
+ "--doi",
+ dest="DOI",
+ action="store",
+ help="Curate janelia authors for a single DOI.",
+ )
+ muexgroup.add_argument(
+ "--file",
+ dest="FILE",
+ action="store",
+ help="Curate janelia authors for multiple DOIs in a file.",
+ )
+ parser.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ parser.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
+ parser.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write results to database. If --write is missing, no changes to the database will be made.",
+ )
+
arg = parser.parse_args()
- LOGGER = JRC.setup_logging(arg)
+ LOGGER = JRC.setup_logging(arg)
# Connect to the database
initialize_program()
- orcid_collection = DB['dis'].orcid
- doi_collection = DB['dis'].dois
+ orcid_collection = DB["dis"].orcid
+ doi_collection = DB["dis"].dois
dois = get_dois_from_commandline(arg.DOI, arg.FILE)
for doi in dois:
doi_record = doi_common.get_doi_record(doi, doi_collection)
if not doi_record:
- print(colored( (f'WARNING: Skipping {doi}. No record found in DOI collection.'), 'yellow' ))
+ print(
+ colored(
+ (f"WARNING: Skipping {doi}. No record found in DOI collection."),
+ "yellow",
+ )
+ )
else:
all_authors = get_author_objects(doi, doi_record, doi_collection)
revised_jrc_authors = []
- for author in all_authors:
+ for author in all_authors:
if author.check == True:
- final_choice = get_corresponding_employee(author, orcid_collection, arg.VERBOSE, arg.WRITE)
+ final_choice = get_corresponding_employee(
+ author, orcid_collection, arg.VERBOSE, arg.WRITE
+ )
if final_choice == None:
revised_jrc_authors.append(Employee(exists=False))
else:
@@ -642,30 +957,34 @@ def terminate_program(msg=None):
revised_jrc_authors.append(Employee(exists=False))
if len(revised_jrc_authors) != len(all_authors):
- sys.exit("ERROR: Length of revised_jrc_author doesn't make sense. Did you overlook an author, or add two employeeIds for one author?")
-
+ sys.exit(
+ "ERROR: Length of revised_jrc_author doesn't make sense. Did you overlook an author, or add two employeeIds for one author?"
+ )
+
print("Janelia authors are highlighted below:")
for i in range(len(revised_jrc_authors)):
if revised_jrc_authors[i].exists:
- print(colored(
- (all_authors[i].name, revised_jrc_authors[i].id), "black", "on_yellow"
- ))
+ print(
+ colored(
+ (all_authors[i].name, revised_jrc_authors[i].id),
+ "black",
+ "on_yellow",
+ )
+ )
else:
print(all_authors[i].name)
if arg.WRITE:
overwrite_jrc_author(revised_jrc_authors)
else:
- print(colored(
- ("WARNING: Dry run successful, no updates were made"), "yellow"
- ))
-
-
-
-
+ print(
+ colored(
+ ("WARNING: Dry run successful, no updates were made"), "yellow"
+ )
+ )
-#For bug testing
+# For bug testing
# import name_match as nm
# nm.initialize_program()
# orcid_collection = nm.DB['dis'].orcid
@@ -673,4 +992,3 @@ def terminate_program(msg=None):
# doi = '10.1101/2024.09.16.613338'
# doi_record = nm.doi_common.get_doi_record(doi, doi_collection)
# all_authors = nm.get_author_objects(doi, doi_record, doi_collection)
-
diff --git a/utility/bin/remove_jrc_author.py b/utility/bin/remove_jrc_author.py
index 3a9d738..185af63 100644
--- a/utility/bin/remove_jrc_author.py
+++ b/utility/bin/remove_jrc_author.py
@@ -1,8 +1,8 @@
-""" remove_jrc_author.py
- Remove a JRC author from a given DOI
+"""remove_jrc_author.py
+Remove a JRC author from a given DOI
"""
-__version__ = '1.0.0'
+__version__ = "1.0.0"
import argparse
from operator import attrgetter
@@ -15,13 +15,14 @@
# Database
DB = {}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -30,21 +31,27 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -52,51 +59,72 @@ def initialize_program():
def processing():
- ''' Process the request
- Keyword arguments:
- None
- Returns:
- None
- '''
- rec = DL.get_doi_record(ARG.DOI.lower(), DB['dis']['dois'])
+ """Process the request
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
+ rec = DL.get_doi_record(ARG.DOI.lower(), DB["dis"]["dois"])
if not rec:
terminate_program(f"DOI {ARG.DOI} not found")
- if 'jrc_author' not in rec:
+ if "jrc_author" not in rec:
terminate_program(f"DOI {ARG.DOI} does not have any JRC authors defined")
- original = list(rec['jrc_author'])
- if ARG.EMPLOYEE not in rec['jrc_author']:
+ original = list(rec["jrc_author"])
+ if ARG.EMPLOYEE not in rec["jrc_author"]:
terminate_program(f"Employee {ARG.EMPLOYEE} not found in JRC authors")
- rec['jrc_author'].remove(ARG.EMPLOYEE)
+ rec["jrc_author"].remove(ARG.EMPLOYEE)
print(f"jrc_author changed from\n{original}\n to\n{rec['jrc_author']}")
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
return
try:
- result = DB['dis']['dois'].update_one({'doi': rec['doi']},
- {'$set': {'jrc_author': rec['jrc_author']}})
+ result = DB["dis"]["dois"].update_one(
+ {"doi": rec["doi"]}, {"$set": {"jrc_author": rec["jrc_author"]}}
+ )
except Exception as err:
terminate_program(err)
- if hasattr(result, 'matched_count') and result.matched_count:
+ if hasattr(result, "matched_count") and result.matched_count:
print(f"DOI {rec['doi']} updated")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Remove JRC author from a given DOI")
- PARSER.add_argument('--doi', dest='DOI', action='store',
- required=True, help='DOI')
- PARSER.add_argument('--employee', dest='EMPLOYEE', action='store',
- help='Employee ID to remove')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, [prod])')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Remove JRC author from a given DOI")
+ PARSER.add_argument("--doi", dest="DOI", action="store", required=True, help="DOI")
+ PARSER.add_argument(
+ "--employee", dest="EMPLOYEE", action="store", help="Employee ID to remove"
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, [prod])",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/utility/bin/search_people.py b/utility/bin/search_people.py
index 89cd1c7..febf1de 100644
--- a/utility/bin/search_people.py
+++ b/utility/bin/search_people.py
@@ -1,6 +1,6 @@
-''' search_people.py
- Search the People system for a name
-'''
+"""search_people.py
+Search the People system for a name
+"""
import argparse
import json
@@ -12,13 +12,14 @@
# pylint: disable=broad-exception-caught
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -27,23 +28,23 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
if "PEOPLE_API_KEY" not in os.environ:
terminate_program("Missing token - set in PEOPLE_API_KEY environment variable")
def perform_search():
- ''' Search the People system
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Search the People system
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
try:
response = JRC.call_people_by_name(ARG.NAME)
except Exception as err:
@@ -53,7 +54,7 @@ def perform_search():
people = {}
for rec in response:
if ARG.JANELIA:
- if 'Janelia' not in rec['locationName']:
+ if "Janelia" not in rec["locationName"]:
continue
key = f"{rec['nameFirstPreferred']} {rec['nameLastPreferred']} {rec['employeeId']}"
else:
@@ -62,35 +63,49 @@ def perform_search():
if not people:
terminate_program(f"{ARG.NAME} was not found")
if len(people) == 1:
- ans = {'who': list(people.keys())[0]}
+ ans = {"who": list(people.keys())[0]}
else:
- quest = [inquirer.List('who',
- message='Select person',
- choices=people.keys())]
+ quest = [inquirer.List("who", message="Select person", choices=people.keys())]
ans = inquirer.prompt(quest, theme=BlueComposure())
if not ans:
terminate_program()
- print(json.dumps(people[ans['who']], indent=2))
+ print(json.dumps(people[ans["who"]], indent=2))
try:
- response = JRC.call_people_by_id(people[ans['who']]['employeeId'])
+ response = JRC.call_people_by_id(people[ans["who"]]["employeeId"])
except Exception as err:
terminate_program(err)
print(f"{'-'*79}")
print(json.dumps(response, indent=2))
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Look up a person by name")
- PARSER.add_argument('--name', dest='NAME', action='store',
- required=True, help='Name to look up')
- PARSER.add_argument('--janelia', dest='JANELIA', action='store_true',
- default=False, help='Janelia employees only')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Look up a person by name")
+ PARSER.add_argument(
+ "--name", dest="NAME", action="store", required=True, help="Name to look up"
+ )
+ PARSER.add_argument(
+ "--janelia",
+ dest="JANELIA",
+ action="store_true",
+ default=False,
+ help="Janelia employees only",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/utility/bin/set_alumni.py b/utility/bin/set_alumni.py
index 71f58c9..aef56ea 100644
--- a/utility/bin/set_alumni.py
+++ b/utility/bin/set_alumni.py
@@ -1,8 +1,8 @@
-""" set_alumni.py
- Set (or unset) the alumni tag for a given user
+"""set_alumni.py
+Set (or unset) the alumni tag for a given user
"""
-__version__ = '1.0.0'
+__version__ = "1.0.0"
import argparse
import json
@@ -16,13 +16,14 @@
# Database
DB = {}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -31,21 +32,27 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -53,19 +60,19 @@ def initialize_program():
def processing():
- ''' Set alumni tag
- Keyword arguments:
- None
- Returns:
- None
- '''
- coll = DB['dis'].orcid
+ """Set alumni tag
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
+ coll = DB["dis"].orcid
if ARG.ORCID:
lookup = ARG.ORCID
- lookup_by = 'orcid'
+ lookup_by = "orcid"
else:
lookup = ARG.EMPLOYEE
- lookup_by = 'employeeId'
+ lookup_by = "employeeId"
try:
row = DL.single_orcid_lookup(lookup, coll, lookup_by)
except Exception as err:
@@ -95,25 +102,49 @@ def processing():
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Set alumni tag")
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Set alumni tag")
UGROUP = PARSER.add_mutually_exclusive_group(required=True)
- UGROUP.add_argument('--orcid', dest='ORCID', action='store',
- help='ORCID')
- UGROUP.add_argument('--employee', dest='EMPLOYEE', action='store',
- help='Employee ID')
- PARSER.add_argument('--unset', dest='UNSET', action='store_true',
- default=False, help='Unset alumni tag')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ UGROUP.add_argument("--orcid", dest="ORCID", action="store", help="ORCID")
+ UGROUP.add_argument(
+ "--employee", dest="EMPLOYEE", action="store", help="Employee ID"
+ )
+ PARSER.add_argument(
+ "--unset",
+ dest="UNSET",
+ action="store_true",
+ default=False,
+ help="Unset alumni tag",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/utility/bin/update_load.py b/utility/bin/update_load.py
index 91721d8..1fc19aa 100644
--- a/utility/bin/update_load.py
+++ b/utility/bin/update_load.py
@@ -9,15 +9,16 @@
# pylint: disable=broad-exception-caught,logging-fstring-interpolation
DB = {}
-COUNT = {'dois': 0, 'notfound': 0, 'updated': 0}
+COUNT = {"dois": 0, "notfound": 0, "updated": 0}
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message or object
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message or object
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -26,20 +27,26 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Initialize database connection
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Initialize database connection
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
@@ -47,16 +54,16 @@ def initialize_program():
def update_load(doi):
- """ Process a list of DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Process a list of DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
doi = doi.lower()
LOGGER.info(doi)
COUNT["dois"] += 1
- coll = DB['dis'].dois
+ coll = DB["dis"].dois
row = coll.find_one({"doi": doi})
if not row:
LOGGER.warning(f"{doi} was not found")
@@ -75,29 +82,29 @@ def update_authors(row):
COUNT["dois"] += 1
first = []
last = None
- if 'jrc_obtained_from' in row and row['jrc_obtained_from'] == "DataCite":
- field = 'creators'
+ if "jrc_obtained_from" in row and row["jrc_obtained_from"] == "DataCite":
+ field = "creators"
datacite = True
else:
- field = 'author'
+ field = "author"
datacite = False
if field in row:
if datacite:
- for auth in row[field]:
- if 'sequence' in auth and auth['sequence'] == 'additional':
+ for auth in row[field]:
+ if "sequence" in auth and auth["sequence"] == "additional":
break
try:
- janelian = DL.is_janelia_author(auth, DB['dis'].orcid, PROJECT)
+ janelian = DL.is_janelia_author(auth, DB["dis"].orcid, PROJECT)
except Exception as err:
LOGGER.error(f"Could not process {row['doi']}")
terminate_program(err)
if janelian:
first.append(janelian)
else:
- janelian = DL.is_janelia_author(row[field][0], DB['dis'].orcid, PROJECT)
+ janelian = DL.is_janelia_author(row[field][0], DB["dis"].orcid, PROJECT)
if janelian:
first.append(janelian)
- janelian = DL.is_janelia_author(row[field][-1], DB['dis'].orcid, PROJECT)
+ janelian = DL.is_janelia_author(row[field][-1], DB["dis"].orcid, PROJECT)
if janelian:
last = janelian
if not first and not last:
@@ -111,17 +118,17 @@ def update_authors(row):
COUNT["updated"] += 1
if ARG.WRITE:
try:
- DB['dis']['dois'].update_one({"doi": row['doi']}, {"$set": payload})
+ DB["dis"]["dois"].update_one({"doi": row["doi"]}, {"$set": payload})
except Exception as err:
terminate_program(err)
def process_dois():
- """ Process a list of DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Process a list of DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
if ARG.DOI:
update_load(ARG.DOI)
@@ -135,46 +142,71 @@ def process_dois():
terminate_program(err)
else:
try:
- cnt = DB['dis'].dois.count_documents({})
- rows = DB['dis'].dois.find({})
+ cnt = DB["dis"].dois.count_documents({})
+ rows = DB["dis"].dois.find({})
except Exception as err:
terminate_program(err)
for row in tqdm(rows, desc="DOIs", total=cnt):
- #update_load(row['doi'])
+ # update_load(row['doi'])
update_authors(row)
print(f"DOIs read: {COUNT['dois']}")
- if COUNT['notfound']:
+ if COUNT["notfound"]:
print(f"DOIs not found: {COUNT['notfound']}")
print(f"DOIs updated: {COUNT['updated']}")
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description="Add a reviewed date to one or more DOIs")
+ description="Add a reviewed date to one or more DOIs"
+ )
GROUP_A = PARSER.add_mutually_exclusive_group(required=True)
- GROUP_A.add_argument('--doi', dest='DOI', action='store',
- help='Single DOI to process')
- GROUP_A.add_argument('--file', dest='FILE', action='store',
- help='File of DOIs to process')
- GROUP_A.add_argument('--all', dest='ALL', action='store_true',
- help='Process all DOIs')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+ GROUP_A.add_argument(
+ "--doi", dest="DOI", action="store", help="Single DOI to process"
+ )
+ GROUP_A.add_argument(
+ "--file", dest="FILE", action="store", help="File of DOIs to process"
+ )
+ GROUP_A.add_argument(
+ "--all", dest="ALL", action="store_true", help="Process all DOIs"
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
try:
- PROJECT = DL.get_project_map(DB['dis'].project_map)
+ PROJECT = DL.get_project_map(DB["dis"].project_map)
except Exception as err:
terminate_program(err)
process_dois()
diff --git a/utility/bin/update_tags.py b/utility/bin/update_tags.py
index 579d08d..63109ee 100644
--- a/utility/bin/update_tags.py
+++ b/utility/bin/update_tags.py
@@ -1,8 +1,8 @@
-""" update_tags.py
- Update tags for selected DOIs
+"""update_tags.py
+Update tags for selected DOIs
"""
-__version__ = '3.0.0'
+__version__ = "3.0.0"
import argparse
import collections
@@ -25,13 +25,14 @@
# Counters
COUNT = collections.defaultdict(lambda: 0, {})
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -40,31 +41,37 @@ def terminate_program(msg=None):
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
dbo = attrgetter(f"{source}.{ARG.MANIFOLD}.write")(dbconfig)
- LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
+ LOGGER.info(
+ "Connecting to %s %s on %s as %s",
+ dbo.name,
+ ARG.MANIFOLD,
+ dbo.host,
+ dbo.user,
+ )
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
try:
- rows = DB['dis'].project_map.find({})
+ rows = DB["dis"].project_map.find({})
except Exception as err:
terminate_program(err)
for row in rows:
- PROJECT[row['name']] = row['project']
+ PROJECT[row["name"]] = row["project"]
try:
orgs = DL.get_supervisory_orgs()
except Exception as err:
@@ -74,66 +81,68 @@ def initialize_program():
def get_dois():
- ''' Get a list of DOIs to process. This will be one of four things:
- - a single DOI from ARG.DOI
- - a list of DOIs from ARG.FILE
- Keyword arguments:
- None
- Returns:
- List of DOIs
- '''
+ """Get a list of DOIs to process. This will be one of four things:
+ - a single DOI from ARG.DOI
+ - a list of DOIs from ARG.FILE
+ Keyword arguments:
+ None
+ Returns:
+ List of DOIs
+ """
if ARG.DOI:
- COUNT['specified'] = 1
+ COUNT["specified"] = 1
return [ARG.DOI]
if ARG.FILE:
- COUNT['specified'] = 1
+ COUNT["specified"] = 1
return ARG.FILE.read().splitlines()
- LOGGER.info(f"Finding DOIs from the last {ARG.DAYS} day{'' if ARG.DAYS == 1 else 's'}")
- week_ago = (datetime.today() - timedelta(days=ARG.DAYS))
+ LOGGER.info(
+ f"Finding DOIs from the last {ARG.DAYS} day{'' if ARG.DAYS == 1 else 's'}"
+ )
+ week_ago = datetime.today() - timedelta(days=ARG.DAYS)
try:
- rows = DB['dis'].dois.find({"jrc_inserted": {"$gte": week_ago}})
+ rows = DB["dis"].dois.find({"jrc_inserted": {"$gte": week_ago}})
except Exception as err:
terminate_program(err)
dois = []
for row in rows:
- dois.append(row['doi'])
- COUNT['specified'] = len(dois)
+ dois.append(row["doi"])
+ COUNT["specified"] = len(dois)
return dois
def append_tags(auth, janelians, atags):
- """ Update "janelians" and "atags" lists
- Keyword arguments:
- auth: author record
- janelians: list of Janelia author names
- atags: list of tags
- Returns:
- None
+ """Update "janelians" and "atags" lists
+ Keyword arguments:
+ auth: author record
+ janelians: list of Janelia author names
+ atags: list of tags
+ Returns:
+ None
"""
- if auth['janelian']:
+ if auth["janelian"]:
janelians.append(f"{auth['given']} {auth['family']}")
- if 'group' in auth:
- if auth['group'] not in atags:
- atags.append(auth['group'])
- if 'tags' in auth:
- for tag in auth['tags']:
+ if "group" in auth:
+ if auth["group"] not in atags:
+ atags.append(auth["group"])
+ if "tags" in auth:
+ for tag in auth["tags"]:
if tag not in atags:
atags.append(tag)
- if 'name' in auth:
- if auth['name'] not in PROJECT:
+ if "name" in auth:
+ if auth["name"] not in PROJECT:
LOGGER.warning(f"Project {auth['name']} is not defined")
- elif PROJECT[auth['name']] and PROJECT[auth['name']] not in atags:
- atags.append(PROJECT[auth['name']])
+ elif PROJECT[auth["name"]] and PROJECT[auth["name"]] not in atags:
+ atags.append(PROJECT[auth["name"]])
def get_tags(authors):
- """ Get tags from a list of authors
- Keyword arguments:
- authors: list of detailed authors
- Returns:
- tags: list of tags
- janelians: list of Janelia author names
- tagauth: dict of authors by tag
+ """Get tags from a list of authors
+ Keyword arguments:
+ authors: list of detailed authors
+ Returns:
+ tags: list of tags
+ janelians: list of Janelia author names
+ tagauth: dict of authors by tag
"""
tags = []
janelians = []
@@ -146,28 +155,28 @@ def get_tags(authors):
tags.append(tag)
if tag not in tagauth:
tagauth[tag] = []
- if 'family' in auth and auth['family'] not in tagauth[tag]:
- tagauth[tag].append(auth['family'])
+ if "family" in auth and auth["family"] not in tagauth[tag]:
+ tagauth[tag].append(auth["family"])
tagauth[tag].sort()
return tags, janelians, tagauth
def get_tag_choices(tags, tagauth, rec):
- """ Get tag choices for checklist prompt
- Keyword arguments:
- tags: list of tags
- tagauth: dict of authors by tag
- rec: DOI record
- Returns:
- tagd: dict of tags by tag name
- current: list of current tags
+ """Get tag choices for checklist prompt
+ Keyword arguments:
+ tags: list of tags
+ tagauth: dict of authors by tag
+ rec: DOI record
+ Returns:
+ tagd: dict of tags by tag name
+ current: list of current tags
"""
tags.sort()
tagd = {}
current = []
tagnames = []
- if 'jrc_tag' in rec:
- tagnames = [etag['name'] for etag in rec['jrc_tag']]
+ if "jrc_tag" in rec:
+ tagnames = [etag["name"] for etag in rec["jrc_tag"]]
for tag in tags:
alert = ""
if tag not in SUPORG:
@@ -180,134 +189,158 @@ def get_tag_choices(tags, tagauth, rec):
def get_suporg_code(name):
- ''' Get the code for a supervisory organization
- Keyword arguments:
- name: name of the organization
- Returns:
- Code for the organization
- '''
+ """Get the code for a supervisory organization
+ Keyword arguments:
+ name: name of the organization
+ Returns:
+ Code for the organization
+ """
if name in SUPORG:
return SUPORG[name]
return None
def add_non_author_tags(payload):
- """ Add suporg tags to a DOI's payload
- Keyword arguments:
- payload: DOI payload
- Returns:
- None
+ """Add suporg tags to a DOI's payload
+ Keyword arguments:
+ payload: DOI payload
+ Returns:
+ None
"""
orgs = DL.get_supervisory_orgs()
- if 'jrc_tag' in payload:
- tags = [tag['name'] for tag in payload['jrc_tag']]
+ if "jrc_tag" in payload:
+ tags = [tag["name"] for tag in payload["jrc_tag"]]
for tag in tags:
if tag in orgs:
del orgs[tag]
- quest = [(inquirer.Checkbox('checklist', carousel=True,
- message='Select tags',
- choices=sorted(orgs.keys())))]
+ quest = [
+ (
+ inquirer.Checkbox(
+ "checklist",
+ carousel=True,
+ message="Select tags",
+ choices=sorted(orgs.keys()),
+ )
+ )
+ ]
ans = inquirer.prompt(quest, theme=BlueComposure())
tags = []
- for tag in ans['checklist']:
+ for tag in ans["checklist"]:
code = orgs[tag]
- tagtype = 'suporg'
+ tagtype = "suporg"
tags.append({"name": tag, "code": code, "type": tagtype})
if not tags:
return
- if 'jrc_tag' not in payload:
- payload['jrc_tag'] = []
- payload['jrc_tag'].extend(tags)
+ if "jrc_tag" not in payload:
+ payload["jrc_tag"] = []
+ payload["jrc_tag"].extend(tags)
def process_tags(ans, tagd):
- """ Process the tags from the prompt
- Keyword arguments:
- ans: prompt answers
- tagd: dict of tags by tag name
- Returns:
- payload: DOI payload
+ """Process the tags from the prompt
+ Keyword arguments:
+ ans: prompt answers
+ tagd: dict of tags by tag name
+ Returns:
+ payload: DOI payload
"""
payload = {}
- if 'checklist' in ans:
+ if "checklist" in ans:
tags = []
- for tag in ans['checklist']:
+ for tag in ans["checklist"]:
code = get_suporg_code(tagd[tag])
- tagtype = 'suporg' if code else 'affiliation'
+ tagtype = "suporg" if code else "affiliation"
tags.append({"name": tagd[tag], "code": code, "type": tagtype})
if tags:
payload["jrc_tag"] = tags
# Additional tags
- if 'additional' in ans and ans['additional'] == 'Yes':
+ if "additional" in ans and ans["additional"] == "Yes":
add_non_author_tags(payload)
return payload
def update_single_doi(rec):
- """ Update tags for a single DOI
- Keyword arguments:
- rec: DOI record
- Returns:
- None
+ """Update tags for a single DOI
+ Keyword arguments:
+ rec: DOI record
+ Returns:
+ None
"""
- authors = DL.get_author_details(rec, DB['dis'].orcid)
+ authors = DL.get_author_details(rec, DB["dis"].orcid)
tags, janelians, tagauth = get_tags(authors)
if not tags:
LOGGER.warning(f"No tags for DOI {rec['doi']}")
tagd, current = get_tag_choices(tags, tagauth, rec)
print(f"DOI: {rec['doi']}")
print(f"{DL.get_title(rec)}")
- print('Janelia authors:', ', '.join(janelians))
- if 'jrc_newsletter' in rec and rec['jrc_newsletter']:
- print(f"{Fore.LIGHTYELLOW_EX}{Back.BLACK}DOI has newsletter date of " \
- + f"{rec['jrc_newsletter']}{Style.RESET_ALL}")
- today = datetime.today().strftime('%Y-%m-%d')
+ print("Janelia authors:", ", ".join(janelians))
+ if "jrc_newsletter" in rec and rec["jrc_newsletter"]:
+ print(
+ f"{Fore.LIGHTYELLOW_EX}{Back.BLACK}DOI has newsletter date of "
+ + f"{rec['jrc_newsletter']}{Style.RESET_ALL}"
+ )
+ today = datetime.today().strftime("%Y-%m-%d")
quest = []
if tagd:
- quest.append(inquirer.Checkbox('checklist', carousel=True,
- message='Select tags',
- choices=tagd, default=current))
- quest.append(inquirer.List('additional',
- message="Do you want to add tags that are not associated " \
- + "with authors?",
- choices=['Yes', 'No'], default='No'))
- quest.append(inquirer.List('newsletter',
- message=f"Set jrc_newsletter to {today}",
- choices=['Yes', 'No']))
+ quest.append(
+ inquirer.Checkbox(
+ "checklist",
+ carousel=True,
+ message="Select tags",
+ choices=tagd,
+ default=current,
+ )
+ )
+ quest.append(
+ inquirer.List(
+ "additional",
+ message="Do you want to add tags that are not associated "
+ + "with authors?",
+ choices=["Yes", "No"],
+ default="No",
+ )
+ )
+ quest.append(
+ inquirer.List(
+ "newsletter",
+ message=f"Set jrc_newsletter to {today}",
+ choices=["Yes", "No"],
+ )
+ )
ans = inquirer.prompt(quest, theme=BlueComposure())
if not ans:
return
payload = process_tags(ans, tagd)
# Newsletter
- if 'newsletter' in ans and ans['newsletter'] == 'Yes':
- payload['jrc_newsletter'] = today
- COUNT['selected'] += 1
+ if "newsletter" in ans and ans["newsletter"] == "Yes":
+ payload["jrc_newsletter"] = today
+ COUNT["selected"] += 1
if not payload:
return
if ARG.WRITE:
- coll = DB['dis'].dois
- result = coll.update_one({"doi": rec['doi']}, {"$set": payload})
- if hasattr(result, 'matched_count') and result.matched_count:
- COUNT['updated'] += 1
+ coll = DB["dis"].dois
+ result = coll.update_one({"doi": rec["doi"]}, {"$set": payload})
+ if hasattr(result, "matched_count") and result.matched_count:
+ COUNT["updated"] += 1
if not tags:
- result = coll.update_one({"doi": rec['doi']}, {"$unset": {"jrc_tag":1}})
+ result = coll.update_one({"doi": rec["doi"]}, {"$unset": {"jrc_tag": 1}})
else:
print(f"{rec['doi']}\n{json.dumps(payload, indent=2)}")
- COUNT['updated'] += 1
+ COUNT["updated"] += 1
def update_tags():
- """ Update tags for specified DOIs
- Keyword arguments:
- None
- Returns:
- None
+ """Update tags for specified DOIs
+ Keyword arguments:
+ None
+ Returns:
+ None
"""
LOGGER.info(f"Started run (version {__version__})")
dois = get_dois()
if not dois:
terminate_program("No DOIs were found")
- coll = DB['dis'].dois
+ coll = DB["dis"].dois
for odoi in dois:
doi = odoi.lower().strip()
try:
@@ -316,7 +349,7 @@ def update_tags():
terminate_program(err)
if not rec:
LOGGER.warning(f"DOI {doi} not found")
- COUNT['notfound'] += 1
+ COUNT["notfound"] += 1
continue
update_single_doi(rec)
print(f"DOIs specified: {COUNT['specified']}")
@@ -326,27 +359,58 @@ def update_tags():
if not ARG.WRITE:
LOGGER.warning("Dry run successful, no updates were made")
+
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
- PARSER = argparse.ArgumentParser(
- description="Update tags")
- PARSER.add_argument('--doi', dest='DOI', action='store',
- help='Single DOI to process')
- PARSER.add_argument('--file', dest='FILE', action='store',
- type=argparse.FileType("r", encoding="ascii"),
- help='File of DOIs to process')
- PARSER.add_argument('--days', dest='DAYS', action='store', type=int,
- default=7, help='Number of days to go back for DOIs')
- PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
- default='prod', choices=['dev', 'prod'],
- help='MongoDB manifold (dev, prod)')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write to database/config system')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
+if __name__ == "__main__":
+ PARSER = argparse.ArgumentParser(description="Update tags")
+ PARSER.add_argument(
+ "--doi", dest="DOI", action="store", help="Single DOI to process"
+ )
+ PARSER.add_argument(
+ "--file",
+ dest="FILE",
+ action="store",
+ type=argparse.FileType("r", encoding="ascii"),
+ help="File of DOIs to process",
+ )
+ PARSER.add_argument(
+ "--days",
+ dest="DAYS",
+ action="store",
+ type=int,
+ default=7,
+ help="Number of days to go back for DOIs",
+ )
+ PARSER.add_argument(
+ "--manifold",
+ dest="MANIFOLD",
+ action="store",
+ default="prod",
+ choices=["dev", "prod"],
+ help="MongoDB manifold (dev, prod)",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write to database/config system",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
initialize_program()
diff --git a/utility/bin/weekly_pubs.py b/utility/bin/weekly_pubs.py
index 7aa9a7f..bbe1808 100644
--- a/utility/bin/weekly_pubs.py
+++ b/utility/bin/weekly_pubs.py
@@ -1,14 +1,14 @@
-''' weekly_pubs.py
- Run all the librarian's scripts for the weekly pipeline, in order.
- This script should live in utility/bin. It expects update_dois.py to be in sync/bin.
- I'm keeping the --write flag for consistency with all our other scripts,
- but it doesn't really make sense to NOT include the --write flag in this script.
- New DOIs won't be added to the database, so the downstream scripts won't work.
-
- Because this script contains a couple of checks on the validity of the DOIs,
- the librarian should always run THIS script to add DOIs to the database,
- and they should NOT run update_dois.py directly.
-'''
+"""weekly_pubs.py
+Run all the librarian's scripts for the weekly pipeline, in order.
+This script should live in utility/bin. It expects update_dois.py to be in sync/bin.
+I'm keeping the --write flag for consistency with all our other scripts,
+but it doesn't really make sense to NOT include the --write flag in this script.
+New DOIs won't be added to the database, so the downstream scripts won't work.
+
+Because this script contains a couple of checks on the validity of the DOIs,
+the librarian should always run THIS script to add DOIs to the database,
+and they should NOT run update_dois.py directly.
+"""
import os
import sys
@@ -23,33 +23,39 @@
from termcolor import colored
-
# Functions to pass command line args to subsequent python scripts
-def create_command(script, ARG): # will produce a list like, e.g. ['python3', 'update_dois.py', '--doi' '10.1038/s41593-024-01738-9', '--verbose']
- return(
- list(flatten( ['python3', script, doi_source(ARG), verbose(ARG), write(ARG)] ))
- )
+
+def create_command(
+ script, ARG
+): # will produce a list like, e.g. ['python3', 'update_dois.py', '--doi' '10.1038/s41593-024-01738-9', '--verbose']
+ return list(flatten(["python3", script, doi_source(ARG), verbose(ARG), write(ARG)]))
+
def doi_source(ARG):
if ARG.DOI:
- return( ['--doi', ARG.DOI] )
+ return ["--doi", ARG.DOI]
elif ARG.FILE:
- return(['--file', ARG.FILE])
+ return ["--file", ARG.FILE]
+
def verbose(ARG):
if ARG.VERBOSE:
- return('--verbose')
+ return "--verbose"
else:
- return([])
+ return []
+
def write(ARG):
if ARG.WRITE:
- return('--write')
+ return "--write"
else:
- return([])
+ return []
-def flatten(xs): # https://stackoverflow.com/questions/2158395/flatten-an-irregular-arbitrarily-nested-list-of-lists
+
+def flatten(
+ xs,
+): # https://stackoverflow.com/questions/2158395/flatten-an-irregular-arbitrarily-nested-list-of-lists
for x in xs:
if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
yield from flatten(x)
@@ -57,26 +63,29 @@ def flatten(xs): # https://stackoverflow.com/questions/2158395/flatten-an-irregu
yield x
-
# Functions to handle DOIs already in the database
+
def copy_arg_for_sync(ARG):
arg_copy = copy.deepcopy(ARG)
- dois = get_dois_from_user_input(ARG) # a list
- dois_to_sync = [ d for d in dois if not already_in_dis_db(d) ]
+ dois = get_dois_from_user_input(ARG) # a list
+ dois_to_sync = [d for d in dois if not already_in_dis_db(d)]
if ARG.DOI:
if dois_to_sync:
- arg_copy.DOI = next(flatten(dois_to_sync)) # unlist to produce a string
+ arg_copy.DOI = next(flatten(dois_to_sync)) # unlist to produce a string
else:
arg_copy.DOI = []
elif ARG.FILE:
- with open('to_sync.txt', 'w') as outF:
- outF.write("\n".join(dois_to_sync) )
- arg_copy.FILE = 'to_sync.txt'
- return(arg_copy)
+ with open("to_sync.txt", "w") as outF:
+ outF.write("\n".join(dois_to_sync))
+ arg_copy.FILE = "to_sync.txt"
+ return arg_copy
+
def get_dois_from_user_input(ARG):
- dois = [ARG.DOI.lower()] if ARG.DOI else [] # .lower() because our collection is case-sensitive
+ dois = (
+ [ARG.DOI.lower()] if ARG.DOI else []
+ ) # .lower() because our collection is case-sensitive
if ARG.FILE:
try:
with open(ARG.FILE, "r", encoding="ascii") as instream:
@@ -85,13 +94,14 @@ def get_dois_from_user_input(ARG):
except Exception as err:
print(f"Could not process {ARG.FILE}")
exit()
- return(dois)
+ return dois
# Functions to check whether any DOIs can't be added to the DB because the crossref metadata aren't available yet.
# As far as I can tell, biorxiv is the only publisher that ever publishes a DOI before the DOI is in crossref.
# Checking for the DOI in biorxiv ensures it's a real DOI, not just a typo.
+
def handle_dois_in_biorxiv_but_not_crossref(ARG):
dois = get_dois_from_user_input(ARG)
for doi in dois:
@@ -99,49 +109,64 @@ def handle_dois_in_biorxiv_but_not_crossref(ARG):
if in_biorxiv(doi):
if ARG.WRITE:
doi_common.add_doi_to_process(doi, dois_to_process_collection)
- print(colored(
- (f'WARNING: {doi} is in bioRxiv but not Crossref. Added to dois_to_process_collection. Will be added to DIS DB automatically as soon as possible.'), "yellow"
- ))
+ print(
+ colored(
+ (
+ f"WARNING: {doi} is in bioRxiv but not Crossref. Added to dois_to_process_collection. Will be added to DIS DB automatically as soon as possible."
+ ),
+ "yellow",
+ )
+ )
else:
- print(colored(
- (f'WARNING: {doi} is not in Crossref or bioRxiv. Might it contain a typo?'), "yellow"
- ))
+ print(
+ colored(
+ (
+ f"WARNING: {doi} is not in Crossref or bioRxiv. Might it contain a typo?"
+ ),
+ "yellow",
+ )
+ )
def in_biorxiv(doi):
- if JRC.call_biorxiv(doi)['messages'][0]['status'] == 'ok':
- return(True)
+ if JRC.call_biorxiv(doi)["messages"][0]["status"] == "ok":
+ return True
else:
- return(False)
+ return False
+
def in_crossref(doi):
if JRC.call_crossref(doi):
- return(True)
+ return True
else:
- return(False)
-
-
+ return False
# Functions to query the API
+
def already_in_dis_db(doi):
if get_rest_info(doi)["source"] == "mongo":
- return(True)
+ return True
else:
- return(False)
+ return False
+
def get_rest_info(doi):
rest = JRC.get_config("rest_services")
url_base = attrgetter("dis.url")(rest)
- url = f'{url_base}doi/{replace_slashes_in_doi(strip_doi_if_provided_as_url(doi))}'
+ url = f"{url_base}doi/{replace_slashes_in_doi(strip_doi_if_provided_as_url(doi))}"
response = get_request(url)
- return( response['rest'] )
+ return response["rest"]
+
def replace_slashes_in_doi(doi):
- return( doi.replace("/", "%2F") ) # e.g. 10.1186/s12859-024-05732-7 becomes 10.1186%2Fs12859-024-05732-7
+ return doi.replace(
+ "/", "%2F"
+ ) # e.g. 10.1186/s12859-024-05732-7 becomes 10.1186%2Fs12859-024-05732-7
+
-def strip_doi_if_provided_as_url(doi, substring=".org/10.", doi_index_in_substring = 5):
+def strip_doi_if_provided_as_url(doi, substring=".org/10.", doi_index_in_substring=5):
# Find all occurrences of the substring
occurrences = [i for i in range(len(doi)) if doi.startswith(substring, i)]
if len(occurrences) > 1:
@@ -149,61 +174,65 @@ def strip_doi_if_provided_as_url(doi, substring=".org/10.", doi_index_in_substri
exit(1) # Exit with a warning code
elif len(occurrences) == 1:
doi_index_in_string = occurrences[0]
- stripped_doi = doi[doi_index_in_string + doi_index_in_substring:]
- return(stripped_doi)
+ stripped_doi = doi[doi_index_in_string + doi_index_in_substring :]
+ return stripped_doi
else:
- return(doi)
+ return doi
+
def get_request(url):
- headers = { 'Content-Type': 'application/json' }
+ headers = {"Content-Type": "application/json"}
response = requests.get(url, headers=headers)
if response.status_code == 200:
- return(response.json())
+ return response.json()
else:
- print(f"There was an error with the API GET request. Status code: {response.status_code}.\n Error message: {response.reason}")
+ print(
+ f"There was an error with the API GET request. Status code: {response.status_code}.\n Error message: {response.reason}"
+ )
sys.exit(1)
-
# Functions and variables to connect to DIS DB
DB = {}
PROJECT = {}
+
def initialize_program():
- ''' Intialize the program
- Keyword arguments:
- None
- Returns:
- None
- '''
+ """Intialize the program
+ Keyword arguments:
+ None
+ Returns:
+ None
+ """
# Database
try:
dbconfig = JRC.get_config("databases")
except Exception as err:
terminate_program(err)
- dbs = ['dis']
+ dbs = ["dis"]
for source in dbs:
- manifold = 'prod'
+ manifold = "prod"
dbo = attrgetter(f"{source}.{manifold}.write")(dbconfig)
try:
DB[source] = JRC.connect_database(dbo)
except Exception as err:
terminate_program(err)
try:
- rows = DB['dis'].project_map.find({})
+ rows = DB["dis"].project_map.find({})
except Exception as err:
terminate_program(err)
for row in rows:
- PROJECT[row['name']] = row['project']
+ PROJECT[row["name"]] = row["project"]
+
def terminate_program(msg=None):
- ''' Terminate the program gracefully
- Keyword arguments:
- msg: error message
- Returns:
- None
- '''
+ """Terminate the program gracefully
+ Keyword arguments:
+ msg: error message
+ Returns:
+ None
+ """
if msg:
if not isinstance(msg, str):
msg = f"An exception of type {type(msg).__name__} occurred. Arguments:\n{msg.args}"
@@ -213,46 +242,75 @@ def terminate_program(msg=None):
# -----------------------------------------------------------------------------
-if __name__ == '__main__':
-
+if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
- description = "Run the weekly pipeline for one or more DOIs: add DOI(s) to database, curate authors and tags, and print citation(s).")
+ description="Run the weekly pipeline for one or more DOIs: add DOI(s) to database, curate authors and tags, and print citation(s)."
+ )
MUEXGROUP = PARSER.add_mutually_exclusive_group(required=True)
- MUEXGROUP.add_argument('--doi', dest='DOI', action='store',
- help='Single DOI to process')
- MUEXGROUP.add_argument('--file', dest='FILE', action='store',
- help='File of DOIs to process')
- PARSER.add_argument('--sync_only', dest='SYNC_ONLY', action='store_true',
- default=False, help='Flag, simply add DOI(s) to database without running downstream scripts.')
- PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
- default=False, help='Flag, Chatty')
- PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
- default=False, help='Flag, Very chatty')
- PARSER.add_argument('--write', dest='WRITE', action='store_true',
- default=False, help='Write results to database. If --write is missing, no changes to the database will be made.')
+ MUEXGROUP.add_argument(
+ "--doi", dest="DOI", action="store", help="Single DOI to process"
+ )
+ MUEXGROUP.add_argument(
+ "--file", dest="FILE", action="store", help="File of DOIs to process"
+ )
+ PARSER.add_argument(
+ "--sync_only",
+ dest="SYNC_ONLY",
+ action="store_true",
+ default=False,
+ help="Flag, simply add DOI(s) to database without running downstream scripts.",
+ )
+ PARSER.add_argument(
+ "--verbose",
+ dest="VERBOSE",
+ action="store_true",
+ default=False,
+ help="Flag, Chatty",
+ )
+ PARSER.add_argument(
+ "--debug",
+ dest="DEBUG",
+ action="store_true",
+ default=False,
+ help="Flag, Very chatty",
+ )
+ PARSER.add_argument(
+ "--write",
+ dest="WRITE",
+ action="store_true",
+ default=False,
+ help="Write results to database. If --write is missing, no changes to the database will be made.",
+ )
ARG = PARSER.parse_args()
LOGGER = JRC.setup_logging(ARG)
# Connect to the database
initialize_program()
- dois_to_process_collection = DB['dis'].dois_to_process
+ dois_to_process_collection = DB["dis"].dois_to_process
-# We don't want to add a DOI to the database that is already in there. (The load source will be set to 'Manual' in the metadata, which is misleading.)
-# We can use the API to check whether the DOI is already in the database.
- arg_copy = copy_arg_for_sync(ARG) # create a copy of ARG.DOI or ARG.FILE that contains only the DOIs that are not already in the DB.
+ # We don't want to add a DOI to the database that is already in there. (The load source will be set to 'Manual' in the metadata, which is misleading.)
+ # We can use the API to check whether the DOI is already in the database.
+ arg_copy = copy_arg_for_sync(
+ ARG
+ ) # create a copy of ARG.DOI or ARG.FILE that contains only the DOIs that are not already in the DB.
# Because my create_command function is expecting an ARG object, this was easier than creating a new data structure.
if not arg_copy.DOI and not arg_copy.FILE:
- print(colored(
- ("WARNING: No DOIs to add to database. Skipping sync."), "yellow"
- ))
+ print(
+ colored(("WARNING: No DOIs to add to database. Skipping sync."), "yellow")
+ )
else:
- handle_dois_in_biorxiv_but_not_crossref(arg_copy) # Warn the user about any DOIs that aren't in Crossref.
+ handle_dois_in_biorxiv_but_not_crossref(
+ arg_copy
+ ) # Warn the user about any DOIs that aren't in Crossref.
# If the DOI is in bioRxiv but not Crossref, the DOI is added to the dois_to_process collection.
- sync_bin_path = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'sync', 'bin'))
- subprocess.call(create_command(f'{sync_bin_path}/update_dois.py', arg_copy))
-
- if not ARG.SYNC_ONLY:
- subprocess.call(create_command('name_match.py', ARG))
- subprocess.call(create_command('update_tags.py', ARG))
- subprocess.call(list(flatten( ['python3', 'get_citation.py', doi_source(ARG)] )))
+ sync_bin_path = os.path.normpath(
+ os.path.join(
+ os.path.dirname(os.path.abspath(__file__)), "..", "..", "sync", "bin"
+ )
+ )
+ subprocess.call(create_command(f"{sync_bin_path}/update_dois.py", arg_copy))
+ if not ARG.SYNC_ONLY:
+ subprocess.call(create_command("name_match.py", ARG))
+ subprocess.call(create_command("update_tags.py", ARG))
+ subprocess.call(list(flatten(["python3", "get_citation.py", doi_source(ARG)])))